http://git-wip-us.apache.org/repos/asf/lens/blob/ae83caae/lens-cube/src/main/java/org/apache/lens/cube/parse/CandidateTable.java ---------------------------------------------------------------------- diff --git a/lens-cube/src/main/java/org/apache/lens/cube/parse/CandidateTable.java b/lens-cube/src/main/java/org/apache/lens/cube/parse/CandidateTable.java index e001ca4..c909545 100644 --- a/lens-cube/src/main/java/org/apache/lens/cube/parse/CandidateTable.java +++ b/lens-cube/src/main/java/org/apache/lens/cube/parse/CandidateTable.java @@ -37,10 +37,10 @@ public interface CandidateTable { String getStorageString(String alias); /** - * Get storage tables corresponding to this candidate + * Get storage table corresponding to this candidate * @return */ - Set<String> getStorageTables(); + String getStorageTable(); /** * Get candidate table @@ -73,5 +73,5 @@ public interface CandidateTable { /** * Get partitions queried */ - Set<?> getPartsQueried(); + Set<?> getParticipatingPartitions(); }
http://git-wip-us.apache.org/repos/asf/lens/blob/ae83caae/lens-cube/src/main/java/org/apache/lens/cube/parse/CandidateTablePruneCause.java ---------------------------------------------------------------------- diff --git a/lens-cube/src/main/java/org/apache/lens/cube/parse/CandidateTablePruneCause.java b/lens-cube/src/main/java/org/apache/lens/cube/parse/CandidateTablePruneCause.java index bd6e27c..1c0d356 100644 --- a/lens-cube/src/main/java/org/apache/lens/cube/parse/CandidateTablePruneCause.java +++ b/lens-cube/src/main/java/org/apache/lens/cube/parse/CandidateTablePruneCause.java @@ -1,4 +1,4 @@ -/** +/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information @@ -18,12 +18,17 @@ */ package org.apache.lens.cube.parse; +import static java.util.stream.Collectors.toSet; + import static org.apache.lens.cube.parse.CandidateTablePruneCause.CandidateTablePruneCode.*; +import static com.google.common.collect.Lists.newArrayList; + import java.util.*; import org.apache.lens.cube.metadata.TimeRange; + import org.codehaus.jackson.annotate.JsonWriteNullProperties; import com.google.common.collect.Lists; @@ -43,43 +48,72 @@ public class CandidateTablePruneCause { public enum CandidateTablePruneCode { // other fact set element is removed ELEMENT_IN_SET_PRUNED("Other candidate from measure covering set is pruned"), - FACT_NOT_AVAILABLE_IN_RANGE("No facts available for all of these time ranges: %s") { - @Override - Object[] getFormatPlaceholders(Set<CandidateTablePruneCause> causes) { - Set<TimeRange> allRanges = Sets.newHashSet(); - for (CandidateTablePruneCause cause : causes) { - allRanges.addAll(cause.getInvalidRanges()); - } - return new Object[]{ - allRanges.toString(), - }; - } - }, // least weight not satisfied MORE_WEIGHT("Picked table had more weight than minimum."), // partial data is enabled, another fact has more data. LESS_DATA("Picked table has less data than the maximum"), // cube table has more partitions MORE_PARTITIONS("Picked table has more partitions than minimum"), + // storage is not supported by execution engine/driver + UNSUPPORTED_STORAGE("Unsupported Storage"), // invalid cube table INVALID("Invalid cube table provided in query"), // expression is not evaluable in the candidate - EXPRESSION_NOT_EVALUABLE("%s expressions not evaluable") { + COLUMN_NOT_FOUND("%s are not %s") { Object[] getFormatPlaceholders(Set<CandidateTablePruneCause> causes) { - List<String> columns = new ArrayList<String>(); - for (CandidateTablePruneCause cause : causes) { - columns.addAll(cause.getMissingExpressions()); + if (causes.size() == 1) { + return new String[]{ + "Columns " + causes.iterator().next().getMissingColumns(), + "present in any table", + }; + } else { + return new String[]{ + "Column Sets: " + causes.stream().map(CandidateTablePruneCause::getMissingColumns).collect(toSet()), + "queriable together", + }; } - return new String[]{columns.toString()}; } }, // candidate table tries to get denormalized field from dimension and the // referred dimension is invalid. INVALID_DENORM_TABLE("Referred dimension is invalid in one of the candidate tables"), - // column not valid in cube table - COLUMN_NOT_VALID("Column not valid in cube table"), + + // Moved from Stoarge causes . + //The storage is removed as its not set in property "lens.cube.query.valid.fact.<fact_name>.storagetables" + INVALID_STORAGE("Invalid Storage"), + // storage table does not exist. Commented as its not being used anywhere in master. + // STOARGE_TABLE_DOES_NOT_EXIST("Storage table does not exist"), + // storage has no update periods queried. Commented as its not being used anywhere in master. + // MISSING_UPDATE_PERIODS("Storage has no update periods"), + + // storage table has no partitions queried + NO_PARTITIONS("Storage table has no partitions"), + // partition column does not exist + PART_COL_DOES_NOT_EXIST("Partition column does not exist"), + // Range is not supported by this storage table + TIME_RANGE_NOT_ANSWERABLE("Range not answerable"), + STORAGE_NOT_AVAILABLE_IN_RANGE("No storages available for all of these time ranges: %s") { + @Override + Object[] getFormatPlaceholders(Set<CandidateTablePruneCause> causes) { + return new Object[]{ + causes.stream().map(CandidateTablePruneCause::getInvalidRanges).flatMap(Collection::stream) + .collect(toSet()).toString(), + }; + } + }, + + EXPRESSION_NOT_EVALUABLE("%s expressions not evaluable") { + Object[] getFormatPlaceholders(Set<CandidateTablePruneCause> causes) { + return new String[]{ + causes.stream().map(CandidateTablePruneCause::getMissingExpressions).flatMap(Collection::stream) + .collect(toSet()).toString(), + }; + } + }, + // column not valid in cube table. Commented the below line as it's not being used in master. + //COLUMN_NOT_VALID("Column not valid in cube table"), // column not found in cube table - COLUMN_NOT_FOUND("%s are not %s") { + DENORM_COLUMN_NOT_FOUND("%s are not %s") { Object[] getFormatPlaceholders(Set<CandidateTablePruneCause> causes) { if (causes.size() == 1) { return new String[]{ @@ -87,12 +121,8 @@ public class CandidateTablePruneCause { "present in any table", }; } else { - List<List<String>> columnSets = new ArrayList<List<String>>(); - for (CandidateTablePruneCause cause : causes) { - columnSets.add(cause.getMissingColumns()); - } return new String[]{ - "Column Sets: " + columnSets, + "Column Sets: " + causes.stream().map(CandidateTablePruneCause::getMissingColumns).collect(toSet()), "queriable together", }; } @@ -107,61 +137,54 @@ public class CandidateTablePruneCause { TIMEDIM_NOT_SUPPORTED("Queried data not available for time dimensions: %s") { @Override Object[] getFormatPlaceholders(Set<CandidateTablePruneCause> causes) { - Set<String> dims = Sets.newHashSet(); - for(CandidateTablePruneCause cause: causes){ - dims.addAll(cause.getUnsupportedTimeDims()); - } return new Object[]{ - dims.toString(), + causes.stream().map(CandidateTablePruneCause::getUnsupportedTimeDims).flatMap(Collection::stream) + .collect(toSet()).toString(), }; } }, NO_FACT_UPDATE_PERIODS_FOR_GIVEN_RANGE("No fact update periods for given range"), + + // no candidate update periods, update period cause will have why each + // update period is not a candidate + NO_CANDIDATE_UPDATE_PERIODS("Storage update periods are not valid for given time range"), + NO_COLUMN_PART_OF_A_JOIN_PATH("No column part of a join path. Join columns: [%s]") { Object[] getFormatPlaceholders(Set<CandidateTablePruneCause> causes) { - List<String> columns = new ArrayList<String>(); - for (CandidateTablePruneCause cause : causes) { - columns.addAll(cause.getJoinColumns()); - } - return new String[]{columns.toString()}; + return new String[]{ + causes.stream().map(CandidateTablePruneCause::getJoinColumns).flatMap(Collection::stream) + .collect(toSet()).toString(), + }; } }, // cube table is an aggregated fact and queried column is not under default // aggregate MISSING_DEFAULT_AGGREGATE("Columns: [%s] are missing default aggregate") { Object[] getFormatPlaceholders(Set<CandidateTablePruneCause> causes) { - List<String> columns = new ArrayList<String>(); - for (CandidateTablePruneCause cause : causes) { - columns.addAll(cause.getColumnsMissingDefaultAggregate()); - } - return new String[]{columns.toString()}; + return new String[]{ + causes.stream().map(CandidateTablePruneCause::getColumnsMissingDefaultAggregate).flatMap(Collection::stream) + .collect(toSet()).toString(), + }; } }, // missing partitions for cube table MISSING_PARTITIONS("Missing partitions for the cube table: %s") { Object[] getFormatPlaceholders(Set<CandidateTablePruneCause> causes) { - Set<Set<String>> missingPartitions = Sets.newHashSet(); - for (CandidateTablePruneCause cause : causes) { - missingPartitions.add(cause.getMissingPartitions()); - } - return new String[]{missingPartitions.toString()}; + return new String[]{ + causes.stream().map(CandidateTablePruneCause::getMissingPartitions).collect(toSet()).toString(), + }; } }, // incomplete data in the fact INCOMPLETE_PARTITION("Data for the requested metrics is only partially complete. Partially complete metrics are:" + " %s. Please try again later or rerun after removing incomplete metrics") { Object[] getFormatPlaceholders(Set<CandidateTablePruneCause> causes) { - Set<String> incompleteMetrics = Sets.newHashSet(); - for (CandidateTablePruneCause cause : causes) { - if (cause.getIncompletePartitions() != null) { - incompleteMetrics.addAll(cause.getIncompletePartitions().keySet()); - } - } - return new String[]{incompleteMetrics.toString()}; + return new String[]{ + causes.stream().map(CandidateTablePruneCause::getIncompletePartitions).collect(toSet()).toString(), + }; } }; - String errorFormat; CandidateTablePruneCode(String format) { @@ -181,67 +204,20 @@ public class CandidateTablePruneCause { } } - public enum SkipStorageCode { - // invalid storage table - INVALID, - // storage table does not exist - TABLE_NOT_EXIST, - // storage has no update periods queried - MISSING_UPDATE_PERIODS, - // no candidate update periods, update period cause will have why each - // update period is not a candidate - NO_CANDIDATE_PERIODS, - // storage table has no partitions queried - NO_PARTITIONS, - // partition column does not exist - PART_COL_DOES_NOT_EXIST, - // Range is not supported by this storage table - RANGE_NOT_ANSWERABLE, - // storage is not supported by execution engine - UNSUPPORTED - } - public enum SkipUpdatePeriodCode { // invalid update period INVALID, - // Query max interval is more than update period - QUERY_INTERVAL_BIGGER + //this update period is greater than the Query max interval as provided by user with lens.cube.query.max.interval + UPDATE_PERIOD_BIGGER_THAN_MAX, + TIME_RANGE_NOT_ANSWERABLE_BY_UPDATE_PERIOD } - @JsonWriteNullProperties(false) - @Data - @NoArgsConstructor - public static class SkipStorageCause { - private SkipStorageCode cause; - // update period to skip cause - private Map<String, SkipUpdatePeriodCode> updatePeriodRejectionCause; - private List<String> nonExistantPartCols; - - public SkipStorageCause(SkipStorageCode cause) { - this.cause = cause; - } - - public static SkipStorageCause partColDoesNotExist(String... partCols) { - SkipStorageCause ret = new SkipStorageCause(SkipStorageCode.PART_COL_DOES_NOT_EXIST); - ret.nonExistantPartCols = new ArrayList<String>(); - for (String s : partCols) { - ret.nonExistantPartCols.add(s); - } - return ret; - } - - public static SkipStorageCause noCandidateUpdatePeriod(Map<String, SkipUpdatePeriodCode> causes) { - SkipStorageCause ret = new SkipStorageCause(SkipStorageCode.NO_CANDIDATE_PERIODS); - ret.updatePeriodRejectionCause = causes; - return ret; - } - } + // Used for Test cases only. + // storage to skip storage cause for dim table + private Map<String, CandidateTablePruneCode> dimStoragePruningCauses; // cause for cube table private CandidateTablePruneCode cause; - // storage to skip storage cause - private Map<String, SkipStorageCause> storageCauses; - // populated only incase of missing partitions cause private Set<String> missingPartitions; // populated only incase of incomplete partitions cause @@ -249,110 +225,129 @@ public class CandidateTablePruneCause { // populated only incase of missing update periods cause private List<String> missingUpdatePeriods; // populated in case of missing columns - private List<String> missingColumns; + private Set<String> missingColumns; // populated in case of expressions not evaluable private List<String> missingExpressions; // populated in case of no column part of a join path - private List<String> joinColumns; + private Collection<String> joinColumns; // the columns that are missing default aggregate. only set in case of MISSING_DEFAULT_AGGREGATE private List<String> columnsMissingDefaultAggregate; // if a time dim is not supported by the fact. Would be set if and only if // the fact is not partitioned by part col of the time dim and time dim is not a dim attribute private Set<String> unsupportedTimeDims; // time covered - private MaxCoveringFactResolver.TimeCovered maxTimeCovered; // ranges in which fact is invalid private List<TimeRange> invalidRanges; + private List<String> nonExistantPartCols; + + private Map<String, SkipUpdatePeriodCode> updatePeriodRejectionCause; + + public CandidateTablePruneCause(CandidateTablePruneCode cause) { this.cause = cause; } // Different static constructors for different causes. - public static CandidateTablePruneCause factNotAvailableInRange(List<TimeRange> ranges) { - CandidateTablePruneCause cause = new CandidateTablePruneCause(FACT_NOT_AVAILABLE_IN_RANGE); + static CandidateTablePruneCause storageNotAvailableInRange(List<TimeRange> ranges) { + CandidateTablePruneCause cause = new CandidateTablePruneCause(STORAGE_NOT_AVAILABLE_IN_RANGE); cause.invalidRanges = ranges; return cause; } - public static CandidateTablePruneCause timeDimNotSupported(Set<String> unsupportedTimeDims) { + static CandidateTablePruneCause timeDimNotSupported(Set<String> unsupportedTimeDims) { CandidateTablePruneCause cause = new CandidateTablePruneCause(TIMEDIM_NOT_SUPPORTED); cause.unsupportedTimeDims = unsupportedTimeDims; return cause; } - public static CandidateTablePruneCause columnNotFound(Collection<String>... missingColumns) { - List<String> colList = new ArrayList<String>(); - for (Collection<String> missing : missingColumns) { - colList.addAll(missing); - } + static CandidateTablePruneCause columnNotFound(Collection<String> missingColumns) { CandidateTablePruneCause cause = new CandidateTablePruneCause(COLUMN_NOT_FOUND); - cause.setMissingColumns(colList); + cause.setMissingColumns(Sets.newHashSet(missingColumns)); + return cause; + } + static CandidateTablePruneCause denormColumnNotFound(Collection<String> missingColumns) { + CandidateTablePruneCause cause = new CandidateTablePruneCause(DENORM_COLUMN_NOT_FOUND); + cause.setMissingColumns(Sets.newHashSet(missingColumns)); return cause; } - public static CandidateTablePruneCause columnNotFound(String... columns) { - List<String> colList = new ArrayList<String>(); - for (String column : columns) { - colList.add(column); - } - return columnNotFound(colList); + static CandidateTablePruneCause columnNotFound(String... columns) { + return columnNotFound(newArrayList(columns)); } - public static CandidateTablePruneCause expressionNotEvaluable(String... exprs) { - List<String> colList = new ArrayList<String>(); - for (String column : exprs) { - colList.add(column); - } + static CandidateTablePruneCause expressionNotEvaluable(String... exprs) { CandidateTablePruneCause cause = new CandidateTablePruneCause(EXPRESSION_NOT_EVALUABLE); - cause.setMissingExpressions(colList); + cause.setMissingExpressions(newArrayList(exprs)); return cause; } - public static CandidateTablePruneCause missingPartitions(Set<String> nonExistingParts) { + static CandidateTablePruneCause missingPartitions(Set<String> nonExistingParts) { CandidateTablePruneCause cause = new CandidateTablePruneCause(MISSING_PARTITIONS); cause.setMissingPartitions(nonExistingParts); return cause; } - public static CandidateTablePruneCause incompletePartitions(Map<String, Map<String, Float>> incompleteParts) { + static CandidateTablePruneCause incompletePartitions(Map<String, Map<String, Float>> incompleteParts) { CandidateTablePruneCause cause = new CandidateTablePruneCause(INCOMPLETE_PARTITION); //incompleteParts may be null when partial data is allowed. cause.setIncompletePartitions(incompleteParts); return cause; } - public static CandidateTablePruneCause lessData(MaxCoveringFactResolver.TimeCovered timeCovered) { - CandidateTablePruneCause cause = new CandidateTablePruneCause(LESS_DATA); - cause.setMaxTimeCovered(timeCovered); - return cause; - } - public static CandidateTablePruneCause noColumnPartOfAJoinPath(final Collection<String> colSet) { CandidateTablePruneCause cause = new CandidateTablePruneCause(NO_COLUMN_PART_OF_A_JOIN_PATH); - cause.setJoinColumns(new ArrayList<String>() { - { - addAll(colSet); - } - }); + cause.setJoinColumns(colSet); + return cause; + } + + static CandidateTablePruneCause missingDefaultAggregate(String... names) { + CandidateTablePruneCause cause = new CandidateTablePruneCause(MISSING_DEFAULT_AGGREGATE); + cause.setColumnsMissingDefaultAggregate(newArrayList(names)); return cause; } - public static CandidateTablePruneCause noCandidateStorages(Map<String, SkipStorageCause> storageCauses) { + /** + * This factroy menthod can be used when a Dim Table is pruned because all its Storages are pruned. + * @param dimStoragePruningCauses + * @return + */ + static CandidateTablePruneCause noCandidateStoragesForDimtable( + Map<String, CandidateTablePruneCode> dimStoragePruningCauses) { CandidateTablePruneCause cause = new CandidateTablePruneCause(NO_CANDIDATE_STORAGES); - cause.setStorageCauses(new HashMap<String, SkipStorageCause>()); - for (Map.Entry<String, SkipStorageCause> entry : storageCauses.entrySet()) { + cause.setDimStoragePruningCauses(new HashMap<String, CandidateTablePruneCode>()); + for (Map.Entry<String, CandidateTablePruneCode> entry : dimStoragePruningCauses.entrySet()) { String key = entry.getKey(); key = key.substring(0, (key.indexOf("_") + key.length() + 1) % (key.length() + 1)); // extract the storage part - cause.getStorageCauses().put(key.toLowerCase(), entry.getValue()); + cause.getDimStoragePruningCauses().put(key.toLowerCase(), entry.getValue()); } return cause; } - public static CandidateTablePruneCause missingDefaultAggregate(String... names) { - CandidateTablePruneCause cause = new CandidateTablePruneCause(MISSING_DEFAULT_AGGREGATE); - cause.setColumnsMissingDefaultAggregate(Lists.newArrayList(names)); + /** + * Queried partition columns are not present in this Storage Candidate + * @param missingPartitionColumns + * @return + */ + public static CandidateTablePruneCause partitionColumnsMissing(final String... missingPartitionColumns) { + return partitionColumnsMissing(Lists.newArrayList(missingPartitionColumns)); + } + public static CandidateTablePruneCause partitionColumnsMissing(final List<String> missingPartitionColumns) { + CandidateTablePruneCause cause = new CandidateTablePruneCause(PART_COL_DOES_NOT_EXIST); + cause.nonExistantPartCols = missingPartitionColumns; + return cause; + } + + /** + * All update periods of this Stoarge Candidate are rejected. + * @param updatePeriodRejectionCause + * @return + */ + static CandidateTablePruneCause updatePeriodsRejected( + final Map<String, SkipUpdatePeriodCode> updatePeriodRejectionCause) { + CandidateTablePruneCause cause = new CandidateTablePruneCause(NO_CANDIDATE_UPDATE_PERIODS); + cause.updatePeriodRejectionCause = updatePeriodRejectionCause; return cause; } } http://git-wip-us.apache.org/repos/asf/lens/blob/ae83caae/lens-cube/src/main/java/org/apache/lens/cube/parse/CandidateTableResolver.java ---------------------------------------------------------------------- diff --git a/lens-cube/src/main/java/org/apache/lens/cube/parse/CandidateTableResolver.java b/lens-cube/src/main/java/org/apache/lens/cube/parse/CandidateTableResolver.java index ed37bc5..6d61f1f 100644 --- a/lens-cube/src/main/java/org/apache/lens/cube/parse/CandidateTableResolver.java +++ b/lens-cube/src/main/java/org/apache/lens/cube/parse/CandidateTableResolver.java @@ -32,7 +32,6 @@ import org.apache.lens.server.api.error.LensException; import org.apache.commons.lang.StringUtils; import com.google.common.collect.Sets; - import lombok.NonNull; import lombok.extern.slf4j.Slf4j; @@ -70,7 +69,8 @@ class CandidateTableResolver implements ContextRewriter { if (cubeql.getAutoJoinCtx() != null) { // Before checking for candidate table columns, prune join paths containing non existing columns // in populated candidate tables - cubeql.getAutoJoinCtx().pruneAllPaths(cubeql.getCube(), cubeql.getCandidateFacts(), null); + cubeql.getAutoJoinCtx().pruneAllPaths(cubeql.getCube(), + CandidateUtil.getStorageCandidates(cubeql.getCandidates()), null); cubeql.getAutoJoinCtx().pruneAllPathsForCandidateDims(cubeql.getCandidateDimTables()); cubeql.getAutoJoinCtx().refreshJoinPathColumns(); } @@ -78,7 +78,6 @@ class CandidateTableResolver implements ContextRewriter { // check for joined columns and denorm columns on refered tables resolveCandidateFactTablesForJoins(cubeql); resolveCandidateDimTablesForJoinsAndDenorms(cubeql); - cubeql.pruneCandidateFactSet(CandidateTablePruneCode.INVALID_DENORM_TABLE); checkForQueriedColumns = true; } } @@ -91,10 +90,16 @@ class CandidateTableResolver implements ContextRewriter { cubeql.getCube().getName() + " does not have any facts"); } for (CubeFactTable fact : factTables) { - CandidateFact cfact = new CandidateFact(fact, cubeql.getCube()); - cubeql.getCandidateFacts().add(cfact); + if (fact.getUpdatePeriods().isEmpty()) { + log.info("Not considering fact: {} as it has no update periods", fact.getName()); + } else { + for (String s : fact.getStorages()) { + StorageCandidate sc = new StorageCandidate(cubeql.getCube(), fact, s, cubeql); + cubeql.getCandidates().add(sc); + } + } } - log.info("Populated candidate facts: {}", cubeql.getCandidateFacts()); + log.info("Populated storage candidates: {}", cubeql.getCandidates()); } if (cubeql.getDimensions().size() != 0) { @@ -154,10 +159,10 @@ class CandidateTableResolver implements ContextRewriter { OptionalDimCtx optdim = cubeql.getOptionalDimensionMap().remove(dim); // remove all the depending candidate table as well for (CandidateTable candidate : optdim.requiredForCandidates) { - if (candidate instanceof CandidateFact) { - log.info("Not considering fact:{} as refered table does not have any valid dimtables", candidate); - cubeql.getCandidateFacts().remove(candidate); - cubeql.addFactPruningMsgs(((CandidateFact) candidate).fact, new CandidateTablePruneCause( + if (candidate instanceof StorageCandidate) { + log.info("Not considering storage candidate:{} as refered table does not have any valid dimtables", candidate); + cubeql.getCandidates().remove(candidate); + cubeql.addStoragePruningMsg(((StorageCandidate) candidate), new CandidateTablePruneCause( CandidateTablePruneCode.INVALID_DENORM_TABLE)); } else { log.info("Not considering dimtable:{} as refered table does not have any valid dimtables", candidate); @@ -172,20 +177,20 @@ class CandidateTableResolver implements ContextRewriter { } } - public static boolean isColumnAvailableInRange(final TimeRange range, Date startTime, Date endTime) { + private static boolean isColumnAvailableInRange(final TimeRange range, Date startTime, Date endTime) { return (isColumnAvailableFrom(range.getFromDate(), startTime) && isColumnAvailableTill(range.getToDate(), endTime)); } - public static boolean isColumnAvailableFrom(@NonNull final Date date, Date startTime) { + private static boolean isColumnAvailableFrom(@NonNull final Date date, Date startTime) { return (startTime == null) ? true : date.equals(startTime) || date.after(startTime); } - public static boolean isColumnAvailableTill(@NonNull final Date date, Date endTime) { + private static boolean isColumnAvailableTill(@NonNull final Date date, Date endTime) { return (endTime == null) ? true : date.equals(endTime) || date.before(endTime); } - public static boolean isFactColumnValidForRange(CubeQueryContext cubeql, CandidateTable cfact, String col) { + private static boolean isFactColumnValidForRange(CubeQueryContext cubeql, CandidateTable cfact, String col) { for(TimeRange range : cubeql.getTimeRanges()) { if (!isColumnAvailableInRange(range, getFactColumnStartTime(cfact, col), getFactColumnEndTime(cfact, col))) { return false; @@ -194,14 +199,14 @@ class CandidateTableResolver implements ContextRewriter { return true; } - public static Date getFactColumnStartTime(CandidateTable table, String factCol) { + private static Date getFactColumnStartTime(CandidateTable table, String factCol) { Date startTime = null; - if (table instanceof CandidateFact) { - for (String key : ((CandidateFact) table).fact.getProperties().keySet()) { + if (table instanceof StorageCandidate) { + for (String key : ((StorageCandidate) table).getFact().getProperties().keySet()) { if (key.contains(MetastoreConstants.FACT_COL_START_TIME_PFX)) { String propCol = StringUtils.substringAfter(key, MetastoreConstants.FACT_COL_START_TIME_PFX); if (factCol.equals(propCol)) { - startTime = ((CandidateFact) table).fact.getDateFromProperty(key, false, true); + startTime = ((StorageCandidate) table).getFact().getDateFromProperty(key, false, true); } } } @@ -209,14 +214,14 @@ class CandidateTableResolver implements ContextRewriter { return startTime; } - public static Date getFactColumnEndTime(CandidateTable table, String factCol) { + private static Date getFactColumnEndTime(CandidateTable table, String factCol) { Date endTime = null; - if (table instanceof CandidateFact) { - for (String key : ((CandidateFact) table).fact.getProperties().keySet()) { + if (table instanceof StorageCandidate) { + for (String key : ((StorageCandidate) table).getFact().getProperties().keySet()) { if (key.contains(MetastoreConstants.FACT_COL_END_TIME_PFX)) { String propCol = StringUtils.substringAfter(key, MetastoreConstants.FACT_COL_END_TIME_PFX); if (factCol.equals(propCol)) { - endTime = ((CandidateFact) table).fact.getDateFromProperty(key, false, true); + endTime = ((StorageCandidate) table).getFact().getDateFromProperty(key, false, true); } } } @@ -228,7 +233,7 @@ class CandidateTableResolver implements ContextRewriter { if (cubeql.getCube() != null) { String str = cubeql.getConf().get(CubeQueryConfUtil.getValidFactTablesKey(cubeql.getCube().getName())); List<String> validFactTables = - StringUtils.isBlank(str) ? null : Arrays.asList(StringUtils.split(str.toLowerCase(), ",")); + StringUtils.isBlank(str) ? null : Arrays.asList(StringUtils.split(str.toLowerCase(), ",")); Set<QueriedPhraseContext> queriedMsrs = new HashSet<>(); Set<QueriedPhraseContext> dimExprs = new HashSet<>(); @@ -239,101 +244,79 @@ class CandidateTableResolver implements ContextRewriter { dimExprs.add(qur); } } - // Remove fact tables based on whether they are valid or not. - for (Iterator<CandidateFact> i = cubeql.getCandidateFacts().iterator(); i.hasNext();) { - CandidateFact cfact = i.next(); - - if (validFactTables != null) { - if (!validFactTables.contains(cfact.getName().toLowerCase())) { - log.info("Not considering fact table:{} as it is not a valid fact", cfact); - cubeql - .addFactPruningMsgs(cfact.fact, new CandidateTablePruneCause(CandidateTablePruneCode.INVALID)); - i.remove(); - continue; + // Remove storage candidates based on whether they are valid or not. + for (Iterator<Candidate> i = cubeql.getCandidates().iterator(); i.hasNext();) { + Candidate cand = i.next(); + if (cand instanceof StorageCandidate) { + StorageCandidate sc = (StorageCandidate) cand; + if (validFactTables != null) { + if (!validFactTables.contains(sc.getFact().getName().toLowerCase())) { + log.info("Not considering storage candidate:{} as it is not a valid candidate", sc); + cubeql.addStoragePruningMsg(sc, new CandidateTablePruneCause(CandidateTablePruneCode.INVALID)); + i.remove(); + continue; + } } - } - - // update expression evaluability for this fact - for (String expr : cubeql.getQueriedExprs()) { - cubeql.getExprCtx().updateEvaluables(expr, cfact); - } - // go over the columns accessed in the query and find out which tables - // can answer the query - // the candidate facts should have all the dimensions queried and - // atleast - // one measure - boolean toRemove = false; - for (QueriedPhraseContext qur : dimExprs) { - if (!qur.isEvaluable(cubeql, cfact)) { - log.info("Not considering fact table:{} as columns {} are not available", cfact, qur.getColumns()); - cubeql.addFactPruningMsgs(cfact.fact, CandidateTablePruneCause.columnNotFound(qur.getColumns())); - toRemove = true; - break; + // update expression evaluability for this fact + for (String expr : cubeql.getQueriedExprs()) { + cubeql.getExprCtx().updateEvaluables(expr, sc); } - } - // check if the candidate fact has atleast one measure queried - // if expression has measures, they should be considered along with other measures and see if the fact can be - // part of measure covering set - if (!checkForFactColumnExistsAndValidForRange(cfact, queriedMsrs, cubeql)) { - Set<String> columns = getColumns(queriedMsrs); - - log.info("Not considering fact table:{} as columns {} is not available", cfact, columns); - cubeql.addFactPruningMsgs(cfact.fact, CandidateTablePruneCause.columnNotFound(columns)); - toRemove = true; - } - // go over join chains and prune facts that dont have any of the columns in each chain - for (JoinChain chain : cubeql.getJoinchains().values()) { - OptionalDimCtx optdim = cubeql.getOptionalDimensionMap().get(Aliased.create((Dimension)cubeql.getCubeTbls() - .get(chain.getName()), chain.getName())); - if (!checkForFactColumnExistsAndValidForRange(cfact, chain.getSourceColumns(), cubeql)) { - // check if chain is optional or not - if (optdim == null) { - log.info("Not considering fact table:{} as columns {} are not available", cfact, - chain.getSourceColumns()); - cubeql.addFactPruningMsgs(cfact.fact, CandidateTablePruneCause.columnNotFound(chain.getSourceColumns())); + // go over the columns accessed in the query and find out which tables + // can answer the query + // the candidate facts should have all the dimensions queried and + // atleast + // one measure + boolean toRemove = false; + for (QueriedPhraseContext qur : dimExprs) { + if (!qur.isEvaluable(cubeql, sc)) { + log.info("Not considering storage candidate:{} as columns {} are not available", sc, qur.getColumns()); + cubeql.addStoragePruningMsg(sc, CandidateTablePruneCause.columnNotFound( + qur.getColumns())); toRemove = true; break; } } - } - if (toRemove) { - i.remove(); + // check if the candidate fact has atleast one measure queried + // if expression has measures, they should be considered along with other measures and see if the fact can be + // part of measure covering set + if (!checkForFactColumnExistsAndValidForRange(sc, queriedMsrs, cubeql)) { + Set<String> columns = getColumns(queriedMsrs); + log.info("Not considering storage candidate:{} as columns {} is not available", sc, columns); + cubeql.addStoragePruningMsg(sc, CandidateTablePruneCause.columnNotFound( + columns)); + toRemove = true; + } + + // go over join chains and prune facts that dont have any of the columns in each chain + for (JoinChain chain : cubeql.getJoinchains().values()) { + OptionalDimCtx optdim = cubeql.getOptionalDimensionMap().get(Aliased.create((Dimension) cubeql.getCubeTbls() + .get(chain.getName()), chain.getName())); + if (!checkForFactColumnExistsAndValidForRange(sc, chain.getSourceColumns(), cubeql)) { + // check if chain is optional or not + if (optdim == null) { + log.info("Not considering storage candidate:{} as columns {} are not available", sc, + chain.getSourceColumns()); + cubeql.addStoragePruningMsg(sc, CandidateTablePruneCause.columnNotFound( + chain.getSourceColumns())); + toRemove = true; + break; + } + } + } + + if (toRemove) { + i.remove(); + } + } else { + throw new LensException("Not a storage candidate!!"); } } - if (cubeql.getCandidateFacts().size() == 0) { + if (cubeql.getCandidates().size() == 0) { throw new LensException(LensCubeErrorCode.NO_FACT_HAS_COLUMN.getLensErrorInfo(), - getColumns(cubeql.getQueriedPhrases()).toString()); - } - Set<Set<CandidateFact>> cfactset; - if (queriedMsrs.isEmpty()) { - // if no measures are queried, add all facts individually as single covering sets - cfactset = new HashSet<>(); - for (CandidateFact cfact : cubeql.getCandidateFacts()) { - Set<CandidateFact> one = new LinkedHashSet<>(); - one.add(cfact); - cfactset.add(one); - } - cubeql.getCandidateFactSets().addAll(cfactset); - } else { - // Find out candidate fact table sets which contain all the measures - // queried - - List<CandidateFact> cfacts = new ArrayList<>(cubeql.getCandidateFacts()); - cfactset = findCoveringSets(cubeql, cfacts, queriedMsrs); - log.info("Measure covering fact sets :{}", cfactset); - String msrString = getColumns(queriedMsrs).toString(); - if (cfactset.isEmpty()) { - throw new LensException(LensCubeErrorCode.NO_FACT_HAS_COLUMN.getLensErrorInfo(), msrString); - } - cubeql.getCandidateFactSets().addAll(cfactset); - cubeql.pruneCandidateFactWithCandidateSet(CandidateTablePruneCause.columnNotFound(getColumns(queriedMsrs))); - - if (cubeql.getCandidateFacts().size() == 0) { - throw new LensException(LensCubeErrorCode.NO_FACT_HAS_COLUMN.getLensErrorInfo(), msrString); - } + getColumns(cubeql.getQueriedPhrases()).toString()); } } } @@ -345,51 +328,6 @@ class CandidateTableResolver implements ContextRewriter { } return cols; } - static Set<Set<CandidateFact>> findCoveringSets(CubeQueryContext cubeql, List<CandidateFact> cfactsPassed, - Set<QueriedPhraseContext> msrs) throws LensException { - Set<Set<CandidateFact>> cfactset = new HashSet<>(); - List<CandidateFact> cfacts = new ArrayList<>(cfactsPassed); - for (Iterator<CandidateFact> i = cfacts.iterator(); i.hasNext();) { - CandidateFact cfact = i.next(); - if (!checkForFactColumnExistsAndValidForRange(cfact, msrs, cubeql)) { - // cfact does not contain any of msrs and none of exprsWithMeasures are evaluable. - // ignore the fact - i.remove(); - continue; - } else if (allEvaluable(cfact, msrs, cubeql)) { - // return single set - Set<CandidateFact> one = new LinkedHashSet<>(); - one.add(cfact); - cfactset.add(one); - i.remove(); - } - } - // facts that contain all measures or no measures are removed from iteration. - // find other facts - for (Iterator<CandidateFact> i = cfacts.iterator(); i.hasNext();) { - CandidateFact cfact = i.next(); - i.remove(); - // find the remaining measures in other facts - if (i.hasNext()) { - Set<QueriedPhraseContext> remainingMsrs = new HashSet<>(msrs); - Set<QueriedPhraseContext> coveredMsrs = coveredMeasures(cfact, msrs, cubeql); - remainingMsrs.removeAll(coveredMsrs); - - Set<Set<CandidateFact>> coveringSets = findCoveringSets(cubeql, cfacts, remainingMsrs); - if (!coveringSets.isEmpty()) { - for (Set<CandidateFact> set : coveringSets) { - set.add(cfact); - cfactset.add(set); - } - } else { - log.info("Couldnt find any set containing remaining measures:{} {} in {}", remainingMsrs, - cfactsPassed); - } - } - } - log.info("Covering set {} for measures {} with factsPassed {}", cfactset, msrs, cfactsPassed); - return cfactset; - } private void resolveCandidateDimTablesForJoinsAndDenorms(CubeQueryContext cubeql) throws LensException { if (cubeql.getAutoJoinCtx() == null) { @@ -484,11 +422,10 @@ class CandidateTableResolver implements ContextRewriter { return; } Collection<String> colSet = null; - if (cubeql.getCube() != null && !cubeql.getCandidateFacts().isEmpty()) { - for (Iterator<CandidateFact> i = cubeql.getCandidateFacts().iterator(); i.hasNext();) { - CandidateFact cfact = i.next(); - CubeFactTable fact = cfact.fact; - + if (cubeql.getCube() != null && !cubeql.getCandidates().isEmpty()) { + for (Iterator<StorageCandidate> i = + CandidateUtil.getStorageCandidates(cubeql.getCandidates()).iterator(); i.hasNext();) { + StorageCandidate sc = i.next(); // for each join path check for columns involved in path for (Map.Entry<Aliased<Dimension>, Map<AbstractCubeTable, List<String>>> joincolumnsEntry : cubeql .getAutoJoinCtx() @@ -497,19 +434,19 @@ class CandidateTableResolver implements ContextRewriter { OptionalDimCtx optdim = cubeql.getOptionalDimensionMap().get(reachableDim); colSet = joincolumnsEntry.getValue().get(cubeql.getCube()); - if (!checkForFactColumnExistsAndValidForRange(cfact, colSet, cubeql)) { + if (!checkForFactColumnExistsAndValidForRange(sc, colSet, cubeql)) { if (optdim == null || optdim.isRequiredInJoinChain - || (optdim != null && optdim.requiredForCandidates.contains(cfact))) { + || (optdim != null && optdim.requiredForCandidates.contains(sc))) { i.remove(); - log.info("Not considering fact table:{} as it does not have columns in any of the join paths." - + " Join columns:{}", fact, colSet); - cubeql.addFactPruningMsgs(fact, CandidateTablePruneCause.noColumnPartOfAJoinPath(colSet)); + log.info("Not considering storage candidate :{} as it does not have columns in any of the join paths." + + " Join columns:{}", sc, colSet); + cubeql.addStoragePruningMsg(sc, CandidateTablePruneCause.noColumnPartOfAJoinPath(colSet)); break; } } } } - if (cubeql.getCandidateFacts().size() == 0) { + if (cubeql.getCandidates().size() == 0) { throw new LensException(LensCubeErrorCode.NO_FACT_HAS_COLUMN.getLensErrorInfo(), colSet == null ? "NULL" : colSet.toString()); } @@ -586,12 +523,16 @@ class CandidateTableResolver implements ContextRewriter { if (removedCandidates.get(dim) != null) { for (CandidateTable candidate : removedCandidates.get(dim)) { if (!candidatesReachableThroughRefs.contains(candidate)) { - if (candidate instanceof CandidateFact) { - if (cubeql.getCandidateFacts().contains(candidate)) { - log.info("Not considering fact:{} as its required optional dims are not reachable", candidate); - cubeql.getCandidateFacts().remove(candidate); - cubeql.addFactPruningMsgs(((CandidateFact) candidate).fact, - CandidateTablePruneCause.columnNotFound(col)); + if (candidate instanceof StorageCandidate) { + if (cubeql.getCandidates().contains(candidate)) { + log.info("Not considering Storage:{} as its required optional dims are not reachable", candidate); + cubeql.getCandidates().remove(candidate); + cubeql.addStoragePruningMsg((StorageCandidate) candidate, + CandidateTablePruneCause.columnNotFound(col)); + Collection<Candidate> prunedCandidates = CandidateUtil. + filterCandidates(cubeql.getCandidates(), (StorageCandidate) candidate); + cubeql.addCandidatePruningMsg(prunedCandidates, + new CandidateTablePruneCause(CandidateTablePruneCode.ELEMENT_IN_SET_PRUNED)); } } else if (cubeql.getCandidateDimTables().containsKey(((CandidateDim) candidate).getBaseTable())) { log.info("Not considering dimtable:{} as its required optional dims are not reachable", candidate); @@ -639,11 +580,11 @@ class CandidateTableResolver implements ContextRewriter { // candidate has other evaluable expressions continue; } - if (candidate instanceof CandidateFact) { - if (cubeql.getCandidateFacts().contains(candidate)) { + if (candidate instanceof StorageCandidate) { + if (cubeql.getCandidates().contains(candidate)) { log.info("Not considering fact:{} as is not reachable through any optional dim", candidate); - cubeql.getCandidateFacts().remove(candidate); - cubeql.addFactPruningMsgs(((CandidateFact) candidate).fact, + cubeql.getCandidates().remove(candidate); + cubeql.addStoragePruningMsg(((StorageCandidate) candidate), CandidateTablePruneCause.expressionNotEvaluable(col.getExprCol())); } } else if (cubeql.getCandidateDimTables().containsKey(((CandidateDim) candidate).getBaseTable())) { @@ -697,7 +638,8 @@ class CandidateTableResolver implements ContextRewriter { // check if it available as reference, if not remove the // candidate log.info("Not considering dimtable: {} as column {} is not available", cdim, col); - cubeql.addDimPruningMsgs(dim, cdim.getTable(), CandidateTablePruneCause.columnNotFound(col)); + cubeql.addDimPruningMsgs(dim, cdim.getTable(), CandidateTablePruneCause.columnNotFound( + col)); i.remove(); break; } @@ -716,7 +658,7 @@ class CandidateTableResolver implements ContextRewriter { // The candidate table contains atleast one column in the colSet and // column can the queried in the range specified - static boolean checkForFactColumnExistsAndValidForRange(CandidateTable table, Collection<String> colSet, + private static boolean checkForFactColumnExistsAndValidForRange(CandidateTable table, Collection<String> colSet, CubeQueryContext cubeql) { if (colSet == null || colSet.isEmpty()) { return true; @@ -729,40 +671,18 @@ class CandidateTableResolver implements ContextRewriter { return false; } - static boolean checkForFactColumnExistsAndValidForRange(CandidateFact table, Collection<QueriedPhraseContext> colSet, - CubeQueryContext cubeql) throws LensException { + + private static boolean checkForFactColumnExistsAndValidForRange(StorageCandidate sc, + Collection<QueriedPhraseContext> colSet, + CubeQueryContext cubeql) throws LensException { if (colSet == null || colSet.isEmpty()) { return true; } for (QueriedPhraseContext qur : colSet) { - if (qur.isEvaluable(cubeql, table)) { + if (qur.isEvaluable(cubeql, sc)) { return true; } } return false; } - - static boolean allEvaluable(CandidateFact table, Collection<QueriedPhraseContext> colSet, - CubeQueryContext cubeql) throws LensException { - if (colSet == null || colSet.isEmpty()) { - return true; - } - for (QueriedPhraseContext qur : colSet) { - if (!qur.isEvaluable(cubeql, table)) { - return false; - } - } - return true; - } - - static Set<QueriedPhraseContext> coveredMeasures(CandidateFact table, Collection<QueriedPhraseContext> msrs, - CubeQueryContext cubeql) throws LensException { - Set<QueriedPhraseContext> coveringSet = new HashSet<>(); - for (QueriedPhraseContext msr : msrs) { - if (msr.isEvaluable(cubeql, table)) { - coveringSet.add(msr); - } - } - return coveringSet; - } } http://git-wip-us.apache.org/repos/asf/lens/blob/ae83caae/lens-cube/src/main/java/org/apache/lens/cube/parse/CandidateUtil.java ---------------------------------------------------------------------- diff --git a/lens-cube/src/main/java/org/apache/lens/cube/parse/CandidateUtil.java b/lens-cube/src/main/java/org/apache/lens/cube/parse/CandidateUtil.java new file mode 100644 index 0000000..b9ff0ef --- /dev/null +++ b/lens-cube/src/main/java/org/apache/lens/cube/parse/CandidateUtil.java @@ -0,0 +1,319 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for theJoinCandidate.java + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.lens.cube.parse; + +import static org.apache.hadoop.hive.ql.parse.HiveParser.Identifier; + +import java.util.*; + +import org.apache.lens.cube.metadata.*; +import org.apache.lens.server.api.error.LensException; + +import org.apache.commons.lang.StringUtils; +import org.apache.hadoop.hive.metastore.api.FieldSchema; +import org.apache.hadoop.hive.ql.parse.ASTNode; +import org.apache.hadoop.hive.ql.parse.HiveParser; + +import org.antlr.runtime.CommonToken; + +import com.google.common.collect.BoundType; +import com.google.common.collect.Range; +import com.google.common.collect.RangeSet; +import com.google.common.collect.TreeRangeSet; + +/** + * Placeholder for Util methods that will be required for {@link Candidate} + */ +public final class CandidateUtil { + + private CandidateUtil() { + // Added due to checkstyle error getting below : + // (design) HideUtilityClassConstructor: Utility classes should not have a public or default constructor. + } + + /** + * Returns true if the Candidate is valid for all the timeranges based on its start and end times. + * @param candidate + * @param timeRanges + * @return + */ + public static boolean isValidForTimeRanges(Candidate candidate, List<TimeRange> timeRanges) { + for (TimeRange timeRange : timeRanges) { + if (!(timeRange.getFromDate().after(candidate.getStartTime()) + && timeRange.getToDate().before(candidate.getEndTime()))) { + return false; + } + } + return true; + } + + static boolean isCandidatePartiallyValidForTimeRange(Date candidateStartTime, Date candidateEndTime, + Date timeRangeStart, Date timeRangeEnd) { + Date start = candidateStartTime.after(timeRangeStart) ? candidateStartTime : timeRangeStart; + Date end = candidateEndTime.before(timeRangeEnd) ? candidateEndTime : timeRangeEnd; + if (end.after(start)) { + return true; + } + return false; + } + + + static boolean isPartiallyValidForTimeRange(Candidate cand, TimeRange timeRange) { + return isPartiallyValidForTimeRanges(cand, Arrays.asList(timeRange)); + } + + static boolean isPartiallyValidForTimeRanges(Candidate cand, List<TimeRange> timeRanges) { + return timeRanges.stream().anyMatch(timeRange -> + isCandidatePartiallyValidForTimeRange(cand.getStartTime(), cand.getEndTime(), + timeRange.getFromDate(), timeRange.getToDate())); + } + + /** + * Copy Query AST from sourceAst to targetAst + * + * @param sourceAst + * @param targetAst + * @throws LensException + */ + static void copyASTs(QueryAST sourceAst, QueryAST targetAst) throws LensException { + + targetAst.setSelectAST(MetastoreUtil.copyAST(sourceAst.getSelectAST())); + targetAst.setWhereAST(MetastoreUtil.copyAST(sourceAst.getWhereAST())); + if (sourceAst.getJoinAST() != null) { + targetAst.setJoinAST(MetastoreUtil.copyAST(sourceAst.getJoinAST())); + } + if (sourceAst.getGroupByAST() != null) { + targetAst.setGroupByAST(MetastoreUtil.copyAST(sourceAst.getGroupByAST())); + } + if (sourceAst.getHavingAST() != null) { + targetAst.setHavingAST(MetastoreUtil.copyAST(sourceAst.getHavingAST())); + } + if (sourceAst.getOrderByAST() != null) { + targetAst.setOrderByAST(MetastoreUtil.copyAST(sourceAst.getOrderByAST())); + } + + targetAst.setLimitValue(sourceAst.getLimitValue()); + targetAst.setFromString(sourceAst.getFromString()); + targetAst.setWhereString(sourceAst.getWhereString()); + } + + public static Set<StorageCandidate> getStorageCandidates(final Candidate candidate) { + return getStorageCandidates(new HashSet<Candidate>(1) {{ add(candidate); }}); + } + + // this function should only be used for union candidates and never for join candidates. + // future scope of improvement: move the data model to use polymorphism + static Set<QueriedPhraseContext> coveredMeasures(Candidate candSet, Collection<QueriedPhraseContext> msrs, + CubeQueryContext cubeql) throws LensException { + Set<QueriedPhraseContext> coveringSet = new HashSet<>(); + for (QueriedPhraseContext msr : msrs) { + if (candSet.getChildren() == null) { + if (msr.isEvaluable(cubeql, (StorageCandidate) candSet)) { + coveringSet.add(msr); + } + } else { + boolean allCanAnswer = true; + for (Candidate cand : candSet.getChildren()) { + if (!msr.isEvaluable(cubeql, (StorageCandidate) cand)) { + allCanAnswer = false; + break; + } + } + if (allCanAnswer) { + coveringSet.add(msr); + } + } + } + return coveringSet; + } + + /** + * Returns true is the Candidates cover the entire time range. + * @param candidates + * @param startTime + * @param endTime + * @return + */ + public static boolean isTimeRangeCovered(Collection<Candidate> candidates, Date startTime, Date endTime) { + RangeSet<Date> set = TreeRangeSet.create(); + for (Candidate candidate : candidates) { + set.add(Range.range(candidate.getStartTime(), BoundType.CLOSED, candidate.getEndTime(), BoundType.OPEN)); + } + return set.encloses(Range.range(startTime, BoundType.CLOSED, endTime, BoundType.OPEN)); + } + + public static Set<String> getColumns(Collection<QueriedPhraseContext> queriedPhraseContexts) { + Set<String> cols = new HashSet<>(); + for (QueriedPhraseContext qur : queriedPhraseContexts) { + cols.addAll(qur.getColumns()); + } + return cols; + } + + /** + * Filters Candidates that contain the filterCandidate + * + * @param candidates + * @param filterCandidate + * @return pruned Candidates + */ + public static Collection<Candidate> filterCandidates(Collection<Candidate> candidates, Candidate filterCandidate) { + List<Candidate> prunedCandidates = new ArrayList<>(); + Iterator<Candidate> itr = candidates.iterator(); + while (itr.hasNext()) { + if (itr.next().contains(filterCandidate)) { + prunedCandidates.add(itr.next()); + itr.remove(); + } + } + return prunedCandidates; + } + + /** + * Gets all the Storage Candidates that participate in the collection of passed candidates + * + * @param candidates + * @return + */ + public static Set<StorageCandidate> getStorageCandidates(Collection<Candidate> candidates) { + Set<StorageCandidate> storageCandidateSet = new HashSet<>(); + getStorageCandidates(candidates, storageCandidateSet); + return storageCandidateSet; + } + + private static void getStorageCandidates(Collection<Candidate> candidates, + Set<StorageCandidate> storageCandidateSet) { + for (Candidate candidate : candidates) { + if (candidate.getChildren() == null) { + //Expecting this to be a StorageCandidate as it has no children. + storageCandidateSet.add((StorageCandidate)candidate); + } else { + getStorageCandidates(candidate.getChildren(), storageCandidateSet); + } + } + } + + public static StorageCandidate cloneStorageCandidate(StorageCandidate sc) throws LensException{ + return new StorageCandidate(sc); + } + + public static boolean factHasColumn(CubeFactTable fact, String column) { + for (FieldSchema factField : fact.getColumns()) { + if (factField.getName().equals(column)) { + return true; + } + } + return false; + } + + public static String getTimeRangeWhereClasue(TimeRangeWriter rangeWriter, + StorageCandidate sc, TimeRange range) throws LensException { + String rangeWhere = rangeWriter.getTimeRangeWhereClause(sc.getCubeql(), + sc.getCubeql().getAliasForTableName(sc.getCube().getName()), + sc.getRangeToPartitions().get(range)); + if (sc.getRangeToExtraWhereFallBack().containsKey(range)) { + rangeWhere = "((" + rangeWhere + ") and (" + sc.getRangeToExtraWhereFallBack().get(range) + "))"; + } + return rangeWhere; + } + + public static class ChildrenSizeBasedCandidateComparator<T> implements Comparator<Candidate> { + @Override + public int compare(Candidate o1, Candidate o2) { + return o1.getChildren().size() - o2.getChildren().size(); + } + } + + private static final String BASE_QUERY_FORMAT = "SELECT %s FROM %s"; + + public static String buildHQLString(String select, String from, String where, + String groupby, String orderby, String having, Integer limit) { + List<String> qstrs = new ArrayList<String>(); + qstrs.add(select); + qstrs.add(from); + if (!StringUtils.isBlank(where)) { + qstrs.add(where); + } + if (!StringUtils.isBlank(groupby)) { + qstrs.add(groupby); + } + if (!StringUtils.isBlank(having)) { + qstrs.add(having); + } + if (!StringUtils.isBlank(orderby)) { + qstrs.add(orderby); + } + if (limit != null) { + qstrs.add(String.valueOf(limit)); + } + + StringBuilder queryFormat = new StringBuilder(); + queryFormat.append(BASE_QUERY_FORMAT); + if (!StringUtils.isBlank(where)) { + queryFormat.append(" WHERE %s"); + } + if (!StringUtils.isBlank(groupby)) { + queryFormat.append(" GROUP BY %s"); + } + if (!StringUtils.isBlank(having)) { + queryFormat.append(" HAVING %s"); + } + if (!StringUtils.isBlank(orderby)) { + queryFormat.append(" ORDER BY %s"); + } + if (limit != null) { + queryFormat.append(" LIMIT %s"); + } + return String.format(queryFormat.toString(), qstrs.toArray(new String[qstrs.size()])); + } + + /** + * + * @param selectAST Outer query selectAST + * @param cubeql Cubequery Context + * + * Update the final alias in the outer select expressions + * 1. Replace queriedAlias with finalAlias if both are not same + * 2. If queriedAlias is missing add finalAlias as alias + */ + public static void updateFinalAlias(ASTNode selectAST, CubeQueryContext cubeql) { + for (int i = 0; i < selectAST.getChildCount(); i++) { + ASTNode selectExpr = (ASTNode) selectAST.getChild(i); + ASTNode aliasNode = HQLParser.findNodeByPath(selectExpr, Identifier); + String finalAlias = cubeql.getSelectPhrases().get(i).getFinalAlias().replaceAll("`", ""); + if (aliasNode != null) { + String queryAlias = aliasNode.getText(); + if (!queryAlias.equals(finalAlias)) { + // replace the alias node + ASTNode newAliasNode = new ASTNode(new CommonToken(HiveParser.Identifier, finalAlias)); + selectAST.getChild(i).replaceChildren(selectExpr.getChildCount() - 1, + selectExpr.getChildCount() - 1, newAliasNode); + } + } else { + // add column alias + ASTNode newAliasNode = new ASTNode(new CommonToken(HiveParser.Identifier, finalAlias)); + selectAST.getChild(i).addChild(newAliasNode); + } + } + } + + + + +} http://git-wip-us.apache.org/repos/asf/lens/blob/ae83caae/lens-cube/src/main/java/org/apache/lens/cube/parse/CheckTableNames.java ---------------------------------------------------------------------- diff --git a/lens-cube/src/main/java/org/apache/lens/cube/parse/CheckTableNames.java b/lens-cube/src/main/java/org/apache/lens/cube/parse/CheckTableNames.java index 8586262..df35a42 100644 --- a/lens-cube/src/main/java/org/apache/lens/cube/parse/CheckTableNames.java +++ b/lens-cube/src/main/java/org/apache/lens/cube/parse/CheckTableNames.java @@ -30,7 +30,6 @@ public class CheckTableNames extends ValidationRule { @Override public boolean validate(CubeQueryContext ctx) throws LensException { - // TODO return true; } http://git-wip-us.apache.org/repos/asf/lens/blob/ae83caae/lens-cube/src/main/java/org/apache/lens/cube/parse/ColumnLifetimeChecker.java ---------------------------------------------------------------------- diff --git a/lens-cube/src/main/java/org/apache/lens/cube/parse/ColumnLifetimeChecker.java b/lens-cube/src/main/java/org/apache/lens/cube/parse/ColumnLifetimeChecker.java new file mode 100644 index 0000000..c3d12a4 --- /dev/null +++ b/lens-cube/src/main/java/org/apache/lens/cube/parse/ColumnLifetimeChecker.java @@ -0,0 +1,125 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.lens.cube.parse; + +import static org.apache.hadoop.hive.ql.parse.HiveParser.*; + +import java.util.*; + +import org.apache.lens.cube.error.ColUnAvailableInTimeRange; +import org.apache.lens.cube.error.ColUnAvailableInTimeRangeException; +import org.apache.lens.cube.error.LensCubeErrorCode; +import org.apache.lens.cube.metadata.*; +import org.apache.lens.cube.metadata.join.JoinPath; +import org.apache.lens.cube.parse.join.AutoJoinContext; +import org.apache.lens.server.api.error.LensException; + +import lombok.extern.slf4j.Slf4j; + +@Slf4j +public class ColumnLifetimeChecker implements ContextRewriter { + @Override + public void rewriteContext(CubeQueryContext cubeql) throws LensException { + if (cubeql.getCube() == null) { + return; + } + doColLifeValidation(cubeql); + } + + private void doColLifeValidation(CubeQueryContext cubeql) throws LensException, + ColUnAvailableInTimeRangeException { + Set<String> cubeColumns = cubeql.getColumnsQueriedForTable(cubeql.getCube().getName()); + if (cubeColumns == null || cubeColumns.isEmpty()) { + // Query doesn't have any columns from cube + return; + } + + for (String col : cubeql.getColumnsQueriedForTable(cubeql.getCube().getName())) { + CubeColumn column = cubeql.getCube().getColumnByName(col); + for (TimeRange range : cubeql.getTimeRanges()) { + if (column == null) { + if (!cubeql.getCube().getTimedDimensions().contains(col)) { + throw new LensException(LensCubeErrorCode.NOT_A_CUBE_COLUMN.getLensErrorInfo(), col); + } + continue; + } + if (!column.isColumnAvailableInTimeRange(range)) { + throwException(column); + } + } + } + + // Remove join paths that have columns with invalid life span + AutoJoinContext joinContext = cubeql.getAutoJoinCtx(); + if (joinContext == null) { + return; + } + // Get cube columns which are part of join chain + Set<String> joinColumns = joinContext.getAllJoinPathColumnsOfTable((AbstractCubeTable) cubeql.getCube()); + if (joinColumns == null || joinColumns.isEmpty()) { + return; + } + + // Loop over all cube columns part of join paths + for (String col : joinColumns) { + CubeColumn column = cubeql.getCube().getColumnByName(col); + for (TimeRange range : cubeql.getTimeRanges()) { + if (!column.isColumnAvailableInTimeRange(range)) { + log.info("Timerange queried is not in column life for {}, Removing join paths containing the column", column); + // Remove join paths containing this column + Map<Aliased<Dimension>, List<JoinPath>> allPaths = joinContext.getAllPaths(); + + for (Aliased<Dimension> dimension : allPaths.keySet()) { + List<JoinPath> joinPaths = allPaths.get(dimension); + Iterator<JoinPath> joinPathIterator = joinPaths.iterator(); + + while (joinPathIterator.hasNext()) { + JoinPath path = joinPathIterator.next(); + if (path.containsColumnOfTable(col, (AbstractCubeTable) cubeql.getCube())) { + log.info("Removing join path: {} as columns :{} is not available in the range", path, col); + joinPathIterator.remove(); + if (joinPaths.isEmpty()) { + // This dimension doesn't have any paths left + throw new LensException(LensCubeErrorCode.NO_JOIN_PATH.getLensErrorInfo(), + "No valid join path available for dimension " + dimension + " which would satisfy time range " + + range.getFromDate() + "-" + range.getToDate()); + } + } + } // End loop to remove path + + } // End loop for all paths + } + } // End time range loop + } // End column loop + } + + private void throwException(CubeColumn column) throws ColUnAvailableInTimeRangeException { + + final Long availabilityStartTime = (column.getStartTimeMillisSinceEpoch().isPresent()) + ? column.getStartTimeMillisSinceEpoch().get() : null; + + final Long availabilityEndTime = column.getEndTimeMillisSinceEpoch().isPresent() + ? column.getEndTimeMillisSinceEpoch().get() : null; + + ColUnAvailableInTimeRange col = new ColUnAvailableInTimeRange(column.getName(), availabilityStartTime, + availabilityEndTime); + + throw new ColUnAvailableInTimeRangeException(col); + } +} http://git-wip-us.apache.org/repos/asf/lens/blob/ae83caae/lens-cube/src/main/java/org/apache/lens/cube/parse/ColumnResolver.java ---------------------------------------------------------------------- diff --git a/lens-cube/src/main/java/org/apache/lens/cube/parse/ColumnResolver.java b/lens-cube/src/main/java/org/apache/lens/cube/parse/ColumnResolver.java index 4d8910a..8b47f86 100644 --- a/lens-cube/src/main/java/org/apache/lens/cube/parse/ColumnResolver.java +++ b/lens-cube/src/main/java/org/apache/lens/cube/parse/ColumnResolver.java @@ -290,7 +290,7 @@ class ColumnResolver implements ContextRewriter { return Optional.fromNullable(funcName); } - private static void addColumnsForSelectExpr(final TrackQueriedColumns sel, ASTNode node, ASTNode parent, + static void addColumnsForSelectExpr(final TrackQueriedColumns sel, ASTNode node, ASTNode parent, Set<String> cols) { if (node.getToken().getType() == TOK_TABLE_OR_COL && (parent != null && parent.getToken().getType() != DOT)) { // Take child ident.totext
