LENS-719: Allow fact start time to be specified for a storage and a update period
Project: http://git-wip-us.apache.org/repos/asf/lens/repo Commit: http://git-wip-us.apache.org/repos/asf/lens/commit/3ed191ac Tree: http://git-wip-us.apache.org/repos/asf/lens/tree/3ed191ac Diff: http://git-wip-us.apache.org/repos/asf/lens/diff/3ed191ac Branch: refs/heads/LENS-581 Commit: 3ed191aca6c18be7c53afefdea28a6d08ae1fd07 Parents: 10dcebb Author: Rajat Khandelwal <[email protected]> Authored: Mon Nov 16 11:41:05 2015 +0530 Committer: Rajat Khandelwal <[email protected]> Committed: Mon Nov 16 11:41:05 2015 +0530 ---------------------------------------------------------------------- lens-api/src/main/resources/cube-0.1.xsd | 9 ++++ .../lens/cube/metadata/MetastoreUtil.java | 6 +++ .../cube/parse/CandidateTablePruneCause.java | 2 + .../org/apache/lens/cube/parse/DateUtil.java | 4 +- .../lens/cube/parse/StorageTableResolver.java | 51 +++++++++++++++++--- .../apache/lens/cube/parse/CubeTestSetup.java | 13 +++-- .../lens/cube/parse/TestCubeRewriter.java | 5 +- 7 files changed, 76 insertions(+), 14 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/lens/blob/3ed191ac/lens-api/src/main/resources/cube-0.1.xsd ---------------------------------------------------------------------- diff --git a/lens-api/src/main/resources/cube-0.1.xsd b/lens-api/src/main/resources/cube-0.1.xsd index 5d7630d..4092133 100644 --- a/lens-api/src/main/resources/cube-0.1.xsd +++ b/lens-api/src/main/resources/cube-0.1.xsd @@ -828,6 +828,15 @@ The following properties can be specified for Elastic search tables : 1. lens.metastore.es.index.name : The underlying ES index name. 2. lens.metastore.es.type.name : The underlying ES type name. + Start and End times for storage table: + 1. cube.storagetable.start.times: Comma separated list of start times for this table. + Start times can be relative times(e.g. now.day - 1 month) or absolute times(e.g. 2014-02) + The max of the start times will be considered as the final start time. This storagetable will + not be candidate for answering time ranges completely before its start time. + 2. cube.storagetable.end.times: Comma separated list of end times for this table. + End times can be relative times(e.g. now.day - 1 month) or absolute times(e.g. 2014-02) + The min of the end times will be considered as the final end time. This storagetable will not be + candidate for answering time ranges completely after its end time. </xs:documentation> </xs:annotation> </xs:element> http://git-wip-us.apache.org/repos/asf/lens/blob/3ed191ac/lens-cube/src/main/java/org/apache/lens/cube/metadata/MetastoreUtil.java ---------------------------------------------------------------------- diff --git a/lens-cube/src/main/java/org/apache/lens/cube/metadata/MetastoreUtil.java b/lens-cube/src/main/java/org/apache/lens/cube/metadata/MetastoreUtil.java index 2796cd9..e5cf468 100644 --- a/lens-cube/src/main/java/org/apache/lens/cube/metadata/MetastoreUtil.java +++ b/lens-cube/src/main/java/org/apache/lens/cube/metadata/MetastoreUtil.java @@ -150,6 +150,12 @@ public class MetastoreUtil { public static String getCubeColEndTimePropertyKey(String colName) { return getColumnKeyPrefix(colName) + END_TIME_SFX; } + public static String getStoragetableStartTimesKey(){ + return STORAGE_PFX + "start.times"; + } + public static String getStoragetableEndTimesKey(){ + return STORAGE_PFX + "end.times"; + } public static String getCubeColCostPropertyKey(String colName) { return getColumnKeyPrefix(colName) + COST_SFX; http://git-wip-us.apache.org/repos/asf/lens/blob/3ed191ac/lens-cube/src/main/java/org/apache/lens/cube/parse/CandidateTablePruneCause.java ---------------------------------------------------------------------- diff --git a/lens-cube/src/main/java/org/apache/lens/cube/parse/CandidateTablePruneCause.java b/lens-cube/src/main/java/org/apache/lens/cube/parse/CandidateTablePruneCause.java index 9ea43bb..9c8b5b9 100644 --- a/lens-cube/src/main/java/org/apache/lens/cube/parse/CandidateTablePruneCause.java +++ b/lens-cube/src/main/java/org/apache/lens/cube/parse/CandidateTablePruneCause.java @@ -180,6 +180,8 @@ public class CandidateTablePruneCause { NO_PARTITIONS, // partition column does not exist PART_COL_DOES_NOT_EXIST, + // Range is not supported by this storage table + RANGE_NOT_ANSWERABLE, // storage is not supported by execution engine UNSUPPORTED } http://git-wip-us.apache.org/repos/asf/lens/blob/3ed191ac/lens-cube/src/main/java/org/apache/lens/cube/parse/DateUtil.java ---------------------------------------------------------------------- diff --git a/lens-cube/src/main/java/org/apache/lens/cube/parse/DateUtil.java b/lens-cube/src/main/java/org/apache/lens/cube/parse/DateUtil.java index 67932da..4690d1d 100644 --- a/lens-cube/src/main/java/org/apache/lens/cube/parse/DateUtil.java +++ b/lens-cube/src/main/java/org/apache/lens/cube/parse/DateUtil.java @@ -63,7 +63,7 @@ public final class DateUtil { } public static final String GRANULARITY = "\\.(" + UNIT + ")"; - public static final String RELATIVE = "(now){1}(" + GRANULARITY + "){0,1}"; + public static final String RELATIVE = "(now)(" + GRANULARITY + ")?"; public static final Pattern P_RELATIVE = Pattern.compile(RELATIVE, Pattern.CASE_INSENSITIVE); public static final String WSPACE = "\\s+"; @@ -79,7 +79,7 @@ public final class DateUtil { public static final Pattern P_UNIT = Pattern.compile(UNIT, Pattern.CASE_INSENSITIVE); public static final String RELDATE_VALIDATOR_STR = RELATIVE + OPTIONAL_WSPACE + "((" + SIGNAGE + ")" + "(" - + WSPACE + ")?" + "(" + QUANTITY + ")" + OPTIONAL_WSPACE + "(" + UNIT + ")){0,1}" + "(s?)"; + + WSPACE + ")?" + "(" + QUANTITY + ")" + OPTIONAL_WSPACE + "(" + UNIT + "))?" + "(s?)"; public static final Pattern RELDATE_VALIDATOR = Pattern.compile(RELDATE_VALIDATOR_STR, Pattern.CASE_INSENSITIVE); http://git-wip-us.apache.org/repos/asf/lens/blob/3ed191ac/lens-cube/src/main/java/org/apache/lens/cube/parse/StorageTableResolver.java ---------------------------------------------------------------------- diff --git a/lens-cube/src/main/java/org/apache/lens/cube/parse/StorageTableResolver.java b/lens-cube/src/main/java/org/apache/lens/cube/parse/StorageTableResolver.java index f67fc26..4db1626 100644 --- a/lens-cube/src/main/java/org/apache/lens/cube/parse/StorageTableResolver.java +++ b/lens-cube/src/main/java/org/apache/lens/cube/parse/StorageTableResolver.java @@ -19,9 +19,12 @@ package org.apache.lens.cube.parse; import static org.apache.lens.cube.metadata.MetastoreUtil.getFactOrDimtableStorageTableName; +import static org.apache.lens.cube.metadata.MetastoreUtil.getStoragetableEndTimesKey; +import static org.apache.lens.cube.metadata.MetastoreUtil.getStoragetableStartTimesKey; import static org.apache.lens.cube.parse.CandidateTablePruneCause.*; import static org.apache.lens.cube.parse.CandidateTablePruneCause.CandidateTablePruneCode.*; import static org.apache.lens.cube.parse.CandidateTablePruneCause.SkipStorageCode.PART_COL_DOES_NOT_EXIST; +import static org.apache.lens.cube.parse.CandidateTablePruneCause.SkipStorageCode.RANGE_NOT_ANSWERABLE; import static org.apache.lens.cube.parse.DateUtil.WSPACE; import static org.apache.lens.cube.parse.StorageUtil.joinWithAnd; @@ -69,6 +72,7 @@ class StorageTableResolver implements ContextRewriter { private TimeRangeWriter rangeWriter; private DateFormat partWhereClauseFormat = null; private PHASE phase; + private HashMap<CubeFactTable, Map<String, SkipStorageCause>> skipStorageCausesPerFact; enum PHASE { FACT_TABLES, FACT_PARTITIONS, DIM_TABLE_AND_PARTITIONS; @@ -179,7 +183,7 @@ class StorageTableResolver implements ContextRewriter { Set<String> storageTables = new HashSet<String>(); Map<String, String> whereClauses = new HashMap<String, String>(); boolean foundPart = false; - Map<String, SkipStorageCause> skipStorageCauses = new HashMap<String, SkipStorageCause>(); + Map<String, SkipStorageCause> skipStorageCauses = new HashMap<>(); for (String storage : dimtable.getStorages()) { if (isStorageSupported(storage)) { String tableName = getFactOrDimtableStorageTableName(dimtable.getName(), storage).toLowerCase(); @@ -235,6 +239,7 @@ class StorageTableResolver implements ContextRewriter { // Resolves all the storage table names, which are valid for each updatePeriod private void resolveFactStorageTableNames(CubeQueryContext cubeql) throws LensException { Iterator<CandidateFact> i = cubeql.getCandidateFacts().iterator(); + skipStorageCausesPerFact = new HashMap<>(); while (i.hasNext()) { CubeFactTable fact = i.next().fact; if (fact.getUpdatePeriods().isEmpty()) { @@ -247,7 +252,7 @@ class StorageTableResolver implements ContextRewriter { String str = conf.get(CubeQueryConfUtil.getValidStorageTablesKey(fact.getName())); List<String> validFactStorageTables = StringUtils.isBlank(str) ? null : Arrays.asList(StringUtils.split(str.toLowerCase(), ",")); - Map<String, SkipStorageCause> skipStorageCauses = new HashMap<String, SkipStorageCause>(); + Map<String, SkipStorageCause> skipStorageCauses = new HashMap<>(); for (Map.Entry<String, Set<UpdatePeriod>> entry : fact.getUpdatePeriods().entrySet()) { String storage = entry.getKey(); @@ -281,7 +286,7 @@ class StorageTableResolver implements ContextRewriter { } Set<String> storageTables = storageTableMap.get(updatePeriod); if (storageTables == null) { - storageTables = new LinkedHashSet<String>(); + storageTables = new LinkedHashSet<>(); storageTableMap.put(updatePeriod, storageTables); } isStorageAdded = true; @@ -292,6 +297,7 @@ class StorageTableResolver implements ContextRewriter { skipStorageCauses.put(storage, SkipStorageCause.noCandidateUpdatePeriod(skipUpdatePeriodCauses)); } } + skipStorageCausesPerFact.put(fact, skipStorageCauses); if (storageTableMap.isEmpty()) { log.info("Not considering fact table:{} as it does not have any storage tables", fact); cubeql.addFactPruningMsgs(fact, noCandidateStorages(skipStorageCauses)); @@ -359,7 +365,10 @@ class StorageTableResolver implements ContextRewriter { while (i.hasNext()) { CandidateFact cfact = i.next(); List<FactPartition> answeringParts = new ArrayList<>(); - HashMap<String, SkipStorageCause> skipStorageCauses = new HashMap<>(); + Map<String, SkipStorageCause> skipStorageCauses = skipStorageCausesPerFact.get(cfact.fact); + if (skipStorageCauses == null) { + skipStorageCauses = new HashMap<>(); + } PartitionRangesForPartitionColumns missingParts = new PartitionRangesForPartitionColumns(); boolean noPartsForRange = false; Set<String> unsupportedTimeDims = Sets.newHashSet(); @@ -506,7 +515,7 @@ class StorageTableResolver implements ContextRewriter { } private Set<FactPartition> getPartitions(CubeFactTable fact, TimeRange range, - HashMap<String, SkipStorageCause> skipStorageCauses, + Map<String, SkipStorageCause> skipStorageCauses, PartitionRangesForPartitionColumns missingPartitions) throws LensException { try { return getPartitions(fact, range, getValidUpdatePeriods(fact), true, failOnPartialData, skipStorageCauses, @@ -564,11 +573,13 @@ class StorageTableResolver implements ContextRewriter { Iterator<String> it = storageTbls.iterator(); while (it.hasNext()) { String storageTableName = it.next(); - if (!client.partColExists(storageTableName, partCol)) { + if (!isStorageTableCandidateForRange(storageTableName, fromDate, toDate)) { + skipStorageCauses.put(storageTableName, new SkipStorageCause(RANGE_NOT_ANSWERABLE)); + it.remove(); + } else if (!client.partColExists(storageTableName, partCol)) { log.info("{} does not exist in {}", partCol, storageTableName); skipStorageCauses.put(storageTableName, SkipStorageCause.partColDoesNotExist(partCol)); it.remove(); - continue; } } @@ -683,6 +694,32 @@ class StorageTableResolver implements ContextRewriter { updatePeriods, addNonExistingParts, failOnPartialData, skipStorageCauses, missingPartitions); } + private boolean isStorageTableCandidateForRange(String storageTableName, Date fromDate, Date toDate) throws + HiveException, LensException { + Date now = new Date(); + String startProperty = client.getTable(storageTableName).getProperty(getStoragetableStartTimesKey()); + if (StringUtils.isNotBlank(startProperty)) { + for (String timeStr : startProperty.split("\\s*,\\s*")) { + if (toDate.before(DateUtil.resolveDate(timeStr, now))) { + log.info("from date {} is before validity start time: {}, hence discarding {}", + toDate, timeStr, storageTableName); + return false; + } + } + } + String endProperty = client.getTable(storageTableName).getProperty(getStoragetableEndTimesKey()); + if (StringUtils.isNotBlank(endProperty)) { + for (String timeStr : endProperty.split("\\s*,\\s*")) { + if (fromDate.after(DateUtil.resolveDate(timeStr, now))) { + log.info("to date {} is after validity end time: {}, hence discarding {}", + fromDate, timeStr, storageTableName); + return false; + } + } + } + return true; + } + private void updateFactPartitionStorageTablesFrom(CubeFactTable fact, FactPartition part, Set<String> storageTableNames) throws LensException, HiveException, ParseException { for (String storageTableName : storageTableNames) { http://git-wip-us.apache.org/repos/asf/lens/blob/3ed191ac/lens-cube/src/test/java/org/apache/lens/cube/parse/CubeTestSetup.java ---------------------------------------------------------------------- diff --git a/lens-cube/src/test/java/org/apache/lens/cube/parse/CubeTestSetup.java b/lens-cube/src/test/java/org/apache/lens/cube/parse/CubeTestSetup.java index dc11b4c..826f6b6 100644 --- a/lens-cube/src/test/java/org/apache/lens/cube/parse/CubeTestSetup.java +++ b/lens-cube/src/test/java/org/apache/lens/cube/parse/CubeTestSetup.java @@ -1309,8 +1309,8 @@ public class CubeTestSetup { updates.add(QUARTERLY); updates.add(YEARLY); - ArrayList<FieldSchema> partCols = new ArrayList<FieldSchema>(); - List<String> timePartCols = new ArrayList<String>(); + ArrayList<FieldSchema> partCols = new ArrayList<>(); + List<String> timePartCols = new ArrayList<>(); partCols.add(TestCubeMetastoreClient.getDatePartition()); timePartCols.add(TestCubeMetastoreClient.getDatePartitionKey()); @@ -1319,20 +1319,25 @@ public class CubeTestSetup { s1.setOutputFormat(HiveIgnoreKeyTextOutputFormat.class.getCanonicalName()); s1.setPartCols(partCols); s1.setTimePartCols(timePartCols); + s1.setTblProps(new HashMap<String, String>()); + s1.getTblProps().put(MetastoreUtil.getStoragetableStartTimesKey(), "2000, now - 10 years"); + s1.getTblProps().put(MetastoreUtil.getStoragetableEndTimesKey(), "now - 5 years, 2010"); StorageTableDesc s2 = new StorageTableDesc(); s2.setInputFormat(TextInputFormat.class.getCanonicalName()); s2.setOutputFormat(HiveIgnoreKeyTextOutputFormat.class.getCanonicalName()); - ArrayList<FieldSchema> s2PartCols = new ArrayList<FieldSchema>(); + ArrayList<FieldSchema> s2PartCols = new ArrayList<>(); s2PartCols.add(new FieldSchema("ttd", serdeConstants.STRING_TYPE_NAME, "test date partition")); s2PartCols.add(new FieldSchema("ttd2", serdeConstants.STRING_TYPE_NAME, "test date partition")); s2.setPartCols(s2PartCols); s2.setTimePartCols(Arrays.asList("ttd", "ttd2")); storageAggregatePeriods.put(c99, updates); + storageAggregatePeriods.put(c0, updates); - Map<String, StorageTableDesc> storageTables = new HashMap<String, StorageTableDesc>(); + Map<String, StorageTableDesc> storageTables = new HashMap<>(); storageTables.put(c99, s2); + storageTables.put(c0, s1); // create cube fact client.createCubeFactTable(TEST_CUBE_NAME, factName, factColumns, storageAggregatePeriods, 0L, factValidityProperties, storageTables); http://git-wip-us.apache.org/repos/asf/lens/blob/3ed191ac/lens-cube/src/test/java/org/apache/lens/cube/parse/TestCubeRewriter.java ---------------------------------------------------------------------- diff --git a/lens-cube/src/test/java/org/apache/lens/cube/parse/TestCubeRewriter.java b/lens-cube/src/test/java/org/apache/lens/cube/parse/TestCubeRewriter.java index d7484d8..4acd063 100644 --- a/lens-cube/src/test/java/org/apache/lens/cube/parse/TestCubeRewriter.java +++ b/lens-cube/src/test/java/org/apache/lens/cube/parse/TestCubeRewriter.java @@ -463,7 +463,7 @@ public class TestCubeRewriter extends TestQueryRewrite { @Test public void testCubeWhereQueryWithMultipleTablesForMonth() throws Exception { Configuration conf = getConf(); - conf.set(CubeQueryConfUtil.DRIVER_SUPPORTED_STORAGES, ""); + conf.set(CubeQueryConfUtil.DRIVER_SUPPORTED_STORAGES, "C0,C1,C2,C3,C4,C5"); conf.set(CubeQueryConfUtil.getValidStorageTablesKey("testfact"), ""); conf.set(CubeQueryConfUtil.getValidUpdatePeriodsKey("testfact", "C1"), "HOURLY"); conf.set(CubeQueryConfUtil.getValidUpdatePeriodsKey("testfact2", "C1"), "YEARLY"); @@ -990,6 +990,9 @@ public class TestCubeRewriter extends TestQueryRewrite { MISSING_PARTITIONS); assertEquals(pruneCauses.getDetails().get("cheapfact").iterator().next().getCause(), NO_CANDIDATE_STORAGES); + CandidateTablePruneCause cheapFactPruneCauses = pruneCauses.getDetails().get("cheapfact").iterator().next(); + assertEquals(cheapFactPruneCauses.getStorageCauses().get("c0").getCause(), SkipStorageCode.RANGE_NOT_ANSWERABLE); + assertEquals(cheapFactPruneCauses.getStorageCauses().get("c99").getCause(), SkipStorageCode.UNSUPPORTED); assertEquals(pruneCauses.getDetails().get("summary4").iterator().next().getCause(), TIMEDIM_NOT_SUPPORTED); assertTrue(pruneCauses.getDetails().get("summary4").iterator().next().getUnsupportedTimeDims().contains("d_time")); }
