LENS-719: Allow fact start time to be specified for a storage and a update 
period


Project: http://git-wip-us.apache.org/repos/asf/lens/repo
Commit: http://git-wip-us.apache.org/repos/asf/lens/commit/3ed191ac
Tree: http://git-wip-us.apache.org/repos/asf/lens/tree/3ed191ac
Diff: http://git-wip-us.apache.org/repos/asf/lens/diff/3ed191ac

Branch: refs/heads/LENS-581
Commit: 3ed191aca6c18be7c53afefdea28a6d08ae1fd07
Parents: 10dcebb
Author: Rajat Khandelwal <[email protected]>
Authored: Mon Nov 16 11:41:05 2015 +0530
Committer: Rajat Khandelwal <[email protected]>
Committed: Mon Nov 16 11:41:05 2015 +0530

----------------------------------------------------------------------
 lens-api/src/main/resources/cube-0.1.xsd        |  9 ++++
 .../lens/cube/metadata/MetastoreUtil.java       |  6 +++
 .../cube/parse/CandidateTablePruneCause.java    |  2 +
 .../org/apache/lens/cube/parse/DateUtil.java    |  4 +-
 .../lens/cube/parse/StorageTableResolver.java   | 51 +++++++++++++++++---
 .../apache/lens/cube/parse/CubeTestSetup.java   | 13 +++--
 .../lens/cube/parse/TestCubeRewriter.java       |  5 +-
 7 files changed, 76 insertions(+), 14 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lens/blob/3ed191ac/lens-api/src/main/resources/cube-0.1.xsd
----------------------------------------------------------------------
diff --git a/lens-api/src/main/resources/cube-0.1.xsd 
b/lens-api/src/main/resources/cube-0.1.xsd
index 5d7630d..4092133 100644
--- a/lens-api/src/main/resources/cube-0.1.xsd
+++ b/lens-api/src/main/resources/cube-0.1.xsd
@@ -828,6 +828,15 @@
             The following properties can be specified for Elastic search 
tables :
             1. lens.metastore.es.index.name : The underlying ES index name.
             2. lens.metastore.es.type.name : The underlying ES type name.
+            Start and End times for storage table:
+            1. cube.storagetable.start.times: Comma separated list of start 
times for this table.
+               Start times can be relative times(e.g. now.day - 1 month) or 
absolute times(e.g. 2014-02)
+               The max of the start times will be considered as the final 
start time. This storagetable will
+               not be candidate for answering time ranges completely before 
its start time.
+            2. cube.storagetable.end.times: Comma separated list of end times 
for this table.
+               End times can be relative times(e.g. now.day - 1 month) or 
absolute times(e.g. 2014-02)
+               The min of the end times will be considered as the final end 
time. This storagetable will not be
+               candidate for answering time ranges completely after its end 
time.
           </xs:documentation>
         </xs:annotation>
       </xs:element>

http://git-wip-us.apache.org/repos/asf/lens/blob/3ed191ac/lens-cube/src/main/java/org/apache/lens/cube/metadata/MetastoreUtil.java
----------------------------------------------------------------------
diff --git 
a/lens-cube/src/main/java/org/apache/lens/cube/metadata/MetastoreUtil.java 
b/lens-cube/src/main/java/org/apache/lens/cube/metadata/MetastoreUtil.java
index 2796cd9..e5cf468 100644
--- a/lens-cube/src/main/java/org/apache/lens/cube/metadata/MetastoreUtil.java
+++ b/lens-cube/src/main/java/org/apache/lens/cube/metadata/MetastoreUtil.java
@@ -150,6 +150,12 @@ public class MetastoreUtil {
   public static String getCubeColEndTimePropertyKey(String colName) {
     return getColumnKeyPrefix(colName) + END_TIME_SFX;
   }
+  public static String getStoragetableStartTimesKey(){
+    return STORAGE_PFX + "start.times";
+  }
+  public static String getStoragetableEndTimesKey(){
+    return STORAGE_PFX + "end.times";
+  }
 
   public static String getCubeColCostPropertyKey(String colName) {
     return getColumnKeyPrefix(colName) + COST_SFX;

http://git-wip-us.apache.org/repos/asf/lens/blob/3ed191ac/lens-cube/src/main/java/org/apache/lens/cube/parse/CandidateTablePruneCause.java
----------------------------------------------------------------------
diff --git 
a/lens-cube/src/main/java/org/apache/lens/cube/parse/CandidateTablePruneCause.java
 
b/lens-cube/src/main/java/org/apache/lens/cube/parse/CandidateTablePruneCause.java
index 9ea43bb..9c8b5b9 100644
--- 
a/lens-cube/src/main/java/org/apache/lens/cube/parse/CandidateTablePruneCause.java
+++ 
b/lens-cube/src/main/java/org/apache/lens/cube/parse/CandidateTablePruneCause.java
@@ -180,6 +180,8 @@ public class CandidateTablePruneCause {
     NO_PARTITIONS,
     // partition column does not exist
     PART_COL_DOES_NOT_EXIST,
+    // Range is not supported by this storage table
+    RANGE_NOT_ANSWERABLE,
     // storage is not supported by execution engine
     UNSUPPORTED
   }

http://git-wip-us.apache.org/repos/asf/lens/blob/3ed191ac/lens-cube/src/main/java/org/apache/lens/cube/parse/DateUtil.java
----------------------------------------------------------------------
diff --git a/lens-cube/src/main/java/org/apache/lens/cube/parse/DateUtil.java 
b/lens-cube/src/main/java/org/apache/lens/cube/parse/DateUtil.java
index 67932da..4690d1d 100644
--- a/lens-cube/src/main/java/org/apache/lens/cube/parse/DateUtil.java
+++ b/lens-cube/src/main/java/org/apache/lens/cube/parse/DateUtil.java
@@ -63,7 +63,7 @@ public final class DateUtil {
   }
 
   public static final String GRANULARITY = "\\.(" + UNIT + ")";
-  public static final String RELATIVE = "(now){1}(" + GRANULARITY + "){0,1}";
+  public static final String RELATIVE = "(now)(" + GRANULARITY + ")?";
   public static final Pattern P_RELATIVE = Pattern.compile(RELATIVE, 
Pattern.CASE_INSENSITIVE);
 
   public static final String WSPACE = "\\s+";
@@ -79,7 +79,7 @@ public final class DateUtil {
   public static final Pattern P_UNIT = Pattern.compile(UNIT, 
Pattern.CASE_INSENSITIVE);
 
   public static final String RELDATE_VALIDATOR_STR = RELATIVE + 
OPTIONAL_WSPACE + "((" + SIGNAGE + ")" + "("
-    + WSPACE + ")?" + "(" + QUANTITY + ")" + OPTIONAL_WSPACE + "(" + UNIT + 
")){0,1}" + "(s?)";
+    + WSPACE + ")?" + "(" + QUANTITY + ")" + OPTIONAL_WSPACE + "(" + UNIT + 
"))?" + "(s?)";
 
   public static final Pattern RELDATE_VALIDATOR = 
Pattern.compile(RELDATE_VALIDATOR_STR, Pattern.CASE_INSENSITIVE);
 

http://git-wip-us.apache.org/repos/asf/lens/blob/3ed191ac/lens-cube/src/main/java/org/apache/lens/cube/parse/StorageTableResolver.java
----------------------------------------------------------------------
diff --git 
a/lens-cube/src/main/java/org/apache/lens/cube/parse/StorageTableResolver.java 
b/lens-cube/src/main/java/org/apache/lens/cube/parse/StorageTableResolver.java
index f67fc26..4db1626 100644
--- 
a/lens-cube/src/main/java/org/apache/lens/cube/parse/StorageTableResolver.java
+++ 
b/lens-cube/src/main/java/org/apache/lens/cube/parse/StorageTableResolver.java
@@ -19,9 +19,12 @@
 package org.apache.lens.cube.parse;
 
 import static 
org.apache.lens.cube.metadata.MetastoreUtil.getFactOrDimtableStorageTableName;
+import static 
org.apache.lens.cube.metadata.MetastoreUtil.getStoragetableEndTimesKey;
+import static 
org.apache.lens.cube.metadata.MetastoreUtil.getStoragetableStartTimesKey;
 import static org.apache.lens.cube.parse.CandidateTablePruneCause.*;
 import static 
org.apache.lens.cube.parse.CandidateTablePruneCause.CandidateTablePruneCode.*;
 import static 
org.apache.lens.cube.parse.CandidateTablePruneCause.SkipStorageCode.PART_COL_DOES_NOT_EXIST;
+import static 
org.apache.lens.cube.parse.CandidateTablePruneCause.SkipStorageCode.RANGE_NOT_ANSWERABLE;
 import static org.apache.lens.cube.parse.DateUtil.WSPACE;
 import static org.apache.lens.cube.parse.StorageUtil.joinWithAnd;
 
@@ -69,6 +72,7 @@ class StorageTableResolver implements ContextRewriter {
   private TimeRangeWriter rangeWriter;
   private DateFormat partWhereClauseFormat = null;
   private PHASE phase;
+  private HashMap<CubeFactTable, Map<String, SkipStorageCause>> 
skipStorageCausesPerFact;
 
   enum PHASE {
     FACT_TABLES, FACT_PARTITIONS, DIM_TABLE_AND_PARTITIONS;
@@ -179,7 +183,7 @@ class StorageTableResolver implements ContextRewriter {
         Set<String> storageTables = new HashSet<String>();
         Map<String, String> whereClauses = new HashMap<String, String>();
         boolean foundPart = false;
-        Map<String, SkipStorageCause> skipStorageCauses = new HashMap<String, 
SkipStorageCause>();
+        Map<String, SkipStorageCause> skipStorageCauses = new HashMap<>();
         for (String storage : dimtable.getStorages()) {
           if (isStorageSupported(storage)) {
             String tableName = 
getFactOrDimtableStorageTableName(dimtable.getName(), storage).toLowerCase();
@@ -235,6 +239,7 @@ class StorageTableResolver implements ContextRewriter {
   // Resolves all the storage table names, which are valid for each 
updatePeriod
   private void resolveFactStorageTableNames(CubeQueryContext cubeql) throws 
LensException {
     Iterator<CandidateFact> i = cubeql.getCandidateFacts().iterator();
+    skipStorageCausesPerFact = new HashMap<>();
     while (i.hasNext()) {
       CubeFactTable fact = i.next().fact;
       if (fact.getUpdatePeriods().isEmpty()) {
@@ -247,7 +252,7 @@ class StorageTableResolver implements ContextRewriter {
       String str = 
conf.get(CubeQueryConfUtil.getValidStorageTablesKey(fact.getName()));
       List<String> validFactStorageTables =
         StringUtils.isBlank(str) ? null : 
Arrays.asList(StringUtils.split(str.toLowerCase(), ","));
-      Map<String, SkipStorageCause> skipStorageCauses = new HashMap<String, 
SkipStorageCause>();
+      Map<String, SkipStorageCause> skipStorageCauses = new HashMap<>();
 
       for (Map.Entry<String, Set<UpdatePeriod>> entry : 
fact.getUpdatePeriods().entrySet()) {
         String storage = entry.getKey();
@@ -281,7 +286,7 @@ class StorageTableResolver implements ContextRewriter {
           }
           Set<String> storageTables = storageTableMap.get(updatePeriod);
           if (storageTables == null) {
-            storageTables = new LinkedHashSet<String>();
+            storageTables = new LinkedHashSet<>();
             storageTableMap.put(updatePeriod, storageTables);
           }
           isStorageAdded = true;
@@ -292,6 +297,7 @@ class StorageTableResolver implements ContextRewriter {
           skipStorageCauses.put(storage, 
SkipStorageCause.noCandidateUpdatePeriod(skipUpdatePeriodCauses));
         }
       }
+      skipStorageCausesPerFact.put(fact, skipStorageCauses);
       if (storageTableMap.isEmpty()) {
         log.info("Not considering fact table:{} as it does not have any 
storage tables", fact);
         cubeql.addFactPruningMsgs(fact, 
noCandidateStorages(skipStorageCauses));
@@ -359,7 +365,10 @@ class StorageTableResolver implements ContextRewriter {
     while (i.hasNext()) {
       CandidateFact cfact = i.next();
       List<FactPartition> answeringParts = new ArrayList<>();
-      HashMap<String, SkipStorageCause> skipStorageCauses = new HashMap<>();
+      Map<String, SkipStorageCause> skipStorageCauses = 
skipStorageCausesPerFact.get(cfact.fact);
+      if (skipStorageCauses == null) {
+        skipStorageCauses = new HashMap<>();
+      }
       PartitionRangesForPartitionColumns missingParts = new 
PartitionRangesForPartitionColumns();
       boolean noPartsForRange = false;
       Set<String> unsupportedTimeDims = Sets.newHashSet();
@@ -506,7 +515,7 @@ class StorageTableResolver implements ContextRewriter {
   }
 
   private Set<FactPartition> getPartitions(CubeFactTable fact, TimeRange range,
-    HashMap<String, SkipStorageCause> skipStorageCauses,
+    Map<String, SkipStorageCause> skipStorageCauses,
     PartitionRangesForPartitionColumns missingPartitions) throws LensException 
{
     try {
       return getPartitions(fact, range, getValidUpdatePeriods(fact), true, 
failOnPartialData, skipStorageCauses,
@@ -564,11 +573,13 @@ class StorageTableResolver implements ContextRewriter {
     Iterator<String> it = storageTbls.iterator();
     while (it.hasNext()) {
       String storageTableName = it.next();
-      if (!client.partColExists(storageTableName, partCol)) {
+      if (!isStorageTableCandidateForRange(storageTableName, fromDate, 
toDate)) {
+        skipStorageCauses.put(storageTableName, new 
SkipStorageCause(RANGE_NOT_ANSWERABLE));
+        it.remove();
+      } else if (!client.partColExists(storageTableName, partCol)) {
         log.info("{} does not exist in {}", partCol, storageTableName);
         skipStorageCauses.put(storageTableName, 
SkipStorageCause.partColDoesNotExist(partCol));
         it.remove();
-        continue;
       }
     }
 
@@ -683,6 +694,32 @@ class StorageTableResolver implements ContextRewriter {
         updatePeriods, addNonExistingParts, failOnPartialData, 
skipStorageCauses, missingPartitions);
   }
 
+  private boolean isStorageTableCandidateForRange(String storageTableName, 
Date fromDate, Date toDate) throws
+    HiveException, LensException {
+    Date now = new Date();
+    String startProperty = 
client.getTable(storageTableName).getProperty(getStoragetableStartTimesKey());
+    if (StringUtils.isNotBlank(startProperty)) {
+      for (String timeStr : startProperty.split("\\s*,\\s*")) {
+        if (toDate.before(DateUtil.resolveDate(timeStr, now))) {
+          log.info("from date {} is before validity start time: {}, hence 
discarding {}",
+            toDate, timeStr, storageTableName);
+          return false;
+        }
+      }
+    }
+    String endProperty = 
client.getTable(storageTableName).getProperty(getStoragetableEndTimesKey());
+    if (StringUtils.isNotBlank(endProperty)) {
+      for (String timeStr : endProperty.split("\\s*,\\s*")) {
+        if (fromDate.after(DateUtil.resolveDate(timeStr, now))) {
+          log.info("to date {} is after validity end time: {}, hence 
discarding {}",
+            fromDate, timeStr, storageTableName);
+          return false;
+        }
+      }
+    }
+    return true;
+  }
+
   private void updateFactPartitionStorageTablesFrom(CubeFactTable fact,
     FactPartition part, Set<String> storageTableNames) throws LensException, 
HiveException, ParseException {
     for (String storageTableName : storageTableNames) {

http://git-wip-us.apache.org/repos/asf/lens/blob/3ed191ac/lens-cube/src/test/java/org/apache/lens/cube/parse/CubeTestSetup.java
----------------------------------------------------------------------
diff --git 
a/lens-cube/src/test/java/org/apache/lens/cube/parse/CubeTestSetup.java 
b/lens-cube/src/test/java/org/apache/lens/cube/parse/CubeTestSetup.java
index dc11b4c..826f6b6 100644
--- a/lens-cube/src/test/java/org/apache/lens/cube/parse/CubeTestSetup.java
+++ b/lens-cube/src/test/java/org/apache/lens/cube/parse/CubeTestSetup.java
@@ -1309,8 +1309,8 @@ public class CubeTestSetup {
     updates.add(QUARTERLY);
     updates.add(YEARLY);
 
-    ArrayList<FieldSchema> partCols = new ArrayList<FieldSchema>();
-    List<String> timePartCols = new ArrayList<String>();
+    ArrayList<FieldSchema> partCols = new ArrayList<>();
+    List<String> timePartCols = new ArrayList<>();
     partCols.add(TestCubeMetastoreClient.getDatePartition());
     timePartCols.add(TestCubeMetastoreClient.getDatePartitionKey());
 
@@ -1319,20 +1319,25 @@ public class CubeTestSetup {
     s1.setOutputFormat(HiveIgnoreKeyTextOutputFormat.class.getCanonicalName());
     s1.setPartCols(partCols);
     s1.setTimePartCols(timePartCols);
+    s1.setTblProps(new HashMap<String, String>());
+    s1.getTblProps().put(MetastoreUtil.getStoragetableStartTimesKey(), "2000, 
now - 10 years");
+    s1.getTblProps().put(MetastoreUtil.getStoragetableEndTimesKey(), "now - 5 
years, 2010");
 
     StorageTableDesc s2 = new StorageTableDesc();
     s2.setInputFormat(TextInputFormat.class.getCanonicalName());
     s2.setOutputFormat(HiveIgnoreKeyTextOutputFormat.class.getCanonicalName());
-    ArrayList<FieldSchema> s2PartCols = new ArrayList<FieldSchema>();
+    ArrayList<FieldSchema> s2PartCols = new ArrayList<>();
     s2PartCols.add(new FieldSchema("ttd", serdeConstants.STRING_TYPE_NAME, 
"test date partition"));
     s2PartCols.add(new FieldSchema("ttd2", serdeConstants.STRING_TYPE_NAME, 
"test date partition"));
     s2.setPartCols(s2PartCols);
     s2.setTimePartCols(Arrays.asList("ttd", "ttd2"));
 
     storageAggregatePeriods.put(c99, updates);
+    storageAggregatePeriods.put(c0, updates);
 
-    Map<String, StorageTableDesc> storageTables = new HashMap<String, 
StorageTableDesc>();
+    Map<String, StorageTableDesc> storageTables = new HashMap<>();
     storageTables.put(c99, s2);
+    storageTables.put(c0, s1);
     // create cube fact
     client.createCubeFactTable(TEST_CUBE_NAME, factName, factColumns, 
storageAggregatePeriods, 0L,
       factValidityProperties, storageTables);

http://git-wip-us.apache.org/repos/asf/lens/blob/3ed191ac/lens-cube/src/test/java/org/apache/lens/cube/parse/TestCubeRewriter.java
----------------------------------------------------------------------
diff --git 
a/lens-cube/src/test/java/org/apache/lens/cube/parse/TestCubeRewriter.java 
b/lens-cube/src/test/java/org/apache/lens/cube/parse/TestCubeRewriter.java
index d7484d8..4acd063 100644
--- a/lens-cube/src/test/java/org/apache/lens/cube/parse/TestCubeRewriter.java
+++ b/lens-cube/src/test/java/org/apache/lens/cube/parse/TestCubeRewriter.java
@@ -463,7 +463,7 @@ public class TestCubeRewriter extends TestQueryRewrite {
   @Test
   public void testCubeWhereQueryWithMultipleTablesForMonth() throws Exception {
     Configuration conf = getConf();
-    conf.set(CubeQueryConfUtil.DRIVER_SUPPORTED_STORAGES, "");
+    conf.set(CubeQueryConfUtil.DRIVER_SUPPORTED_STORAGES, "C0,C1,C2,C3,C4,C5");
     conf.set(CubeQueryConfUtil.getValidStorageTablesKey("testfact"), "");
     conf.set(CubeQueryConfUtil.getValidUpdatePeriodsKey("testfact", "C1"), 
"HOURLY");
     conf.set(CubeQueryConfUtil.getValidUpdatePeriodsKey("testfact2", "C1"), 
"YEARLY");
@@ -990,6 +990,9 @@ public class TestCubeRewriter extends TestQueryRewrite {
       MISSING_PARTITIONS);
     
assertEquals(pruneCauses.getDetails().get("cheapfact").iterator().next().getCause(),
       NO_CANDIDATE_STORAGES);
+    CandidateTablePruneCause cheapFactPruneCauses = 
pruneCauses.getDetails().get("cheapfact").iterator().next();
+    assertEquals(cheapFactPruneCauses.getStorageCauses().get("c0").getCause(), 
SkipStorageCode.RANGE_NOT_ANSWERABLE);
+    
assertEquals(cheapFactPruneCauses.getStorageCauses().get("c99").getCause(), 
SkipStorageCode.UNSUPPORTED);
     
assertEquals(pruneCauses.getDetails().get("summary4").iterator().next().getCause(),
 TIMEDIM_NOT_SUPPORTED);
     
assertTrue(pruneCauses.getDetails().get("summary4").iterator().next().getUnsupportedTimeDims().contains("d_time"));
   }

Reply via email to