[2/2] hive git commit: HIVE-18609: Results cache invalidation based on ACID table updates (Jason Dere, reviewed by GopalV)

jdere Mon, 16 Apr 2018 16:45:06 -0700

HIVE-18609: Results cache invalidation based on ACID table updates (Jason Dere, 
reviewed by GopalV)



Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/7948def1
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/7948def1
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/7948def1

Branch: refs/heads/branch-3
Commit: 7948def19971df6b18870a4f388e30ce08d37deb
Parents: 67a8442
Author: Jason Dere <jd...@hortonworks.com>
Authored: Mon Apr 16 16:44:40 2018 -0700
Committer: Jason Dere <jd...@hortonworks.com>
Committed: Mon Apr 16 16:44:40 2018 -0700

----------------------------------------------------------------------
 .../org/apache/hadoop/hive/conf/HiveConf.java   |   4 +
 .../test/resources/testconfiguration.properties |   2 +
 .../java/org/apache/hadoop/hive/ql/Driver.java  |   7 +-
 .../ql/cache/results/QueryResultsCache.java     |  65 +-
 .../org/apache/hadoop/hive/ql/io/AcidUtils.java |  16 +-
 .../hadoop/hive/ql/parse/SemanticAnalyzer.java  |  55 +-
 .../queries/clientpositive/results_cache_1.q    |   1 +
 .../queries/clientpositive/results_cache_2.q    |   1 +
 .../clientpositive/results_cache_capacity.q     |   1 +
 .../clientpositive/results_cache_empty_result.q |   1 +
 .../clientpositive/results_cache_invalidation.q |  89 +++
 .../clientpositive/results_cache_lifetime.q     |   1 +
 .../results_cache_quoted_identifiers.q          |   1 +
 .../clientpositive/results_cache_temptable.q    |   1 +
 .../results_cache_transactional.q               |  56 ++
 .../clientpositive/results_cache_with_masking.q |   1 +
 .../llap/results_cache_invalidation.q.out       | 793 +++++++++++++++++++
 .../llap/results_cache_transactional.q.out      | 624 +++++++++++++++
 .../results_cache_invalidation.q.out            | 748 +++++++++++++++++
 .../results_cache_transactional.q.out           | 583 ++++++++++++++
 20 files changed, 3041 insertions(+), 9 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/7948def1/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
----------------------------------------------------------------------
diff --git a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java 
b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
index e540d02..9eb76e7 100644
--- a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
+++ b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
@@ -4266,6 +4266,10 @@ public class HiveConf extends Configuration {
         "If the query results cache is enabled. This will keep results of 
previously executed queries " +
         "to be reused if the same query is executed again."),
 
+    
HIVE_QUERY_RESULTS_CACHE_NONTRANSACTIONAL_TABLES_ENABLED("hive.query.results.cache.nontransactional.tables.enabled",
 false,
+        "If the query results cache is enabled for queries involving 
non-transactional tables." +
+        "Users who enable this setting should be willing to tolerate some 
amount of stale results in the cache."),
+
     
HIVE_QUERY_RESULTS_CACHE_WAIT_FOR_PENDING_RESULTS("hive.query.results.cache.wait.for.pending.results",
 true,
         "Should a query wait for the pending results of an already running 
query, " +
         "in order to use the cached result when it becomes ready"),

http://git-wip-us.apache.org/repos/asf/hive/blob/7948def1/itests/src/test/resources/testconfiguration.properties
----------------------------------------------------------------------
diff --git a/itests/src/test/resources/testconfiguration.properties 
b/itests/src/test/resources/testconfiguration.properties
index c5d4e9f..27e5feb 100644
--- a/itests/src/test/resources/testconfiguration.properties
+++ b/itests/src/test/resources/testconfiguration.properties
@@ -244,6 +244,8 @@ minillaplocal.shared.query.files=alter_merge_2_orc.q,\
   ptf_streaming.q,\
   results_cache_1.q,\
   results_cache_empty_result.q,\
+  results_cache_invalidation.q,\
+  results_cache_transactional.q,\
   sample1.q,\
   selectDistinctStar.q,\
   select_dummy_source.q,\

http://git-wip-us.apache.org/repos/asf/hive/blob/7948def1/ql/src/java/org/apache/hadoop/hive/ql/Driver.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/Driver.java 
b/ql/src/java/org/apache/hadoop/hive/ql/Driver.java
index a88453c..4acdd9b 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/Driver.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/Driver.java
@@ -1977,9 +1977,14 @@ public class Driver implements IDriver {
         PerfLogger perfLogger = SessionState.getPerfLogger();
         perfLogger.PerfLogBegin(CLASS_NAME, PerfLogger.SAVE_TO_RESULTS_CACHE);
 
+        ValidTxnWriteIdList txnWriteIdList = null;
+        if (plan.hasAcidResourcesInQuery()) {
+          txnWriteIdList = AcidUtils.getValidTxnWriteIdList(conf);
+        }
         boolean savedToCache = QueryResultsCache.getInstance().setEntryValid(
             cacheUsage.getCacheEntry(),
-            plan.getFetchTask().getWork());
+            plan.getFetchTask().getWork(),
+            txnWriteIdList);
         LOG.info("savedToCache: {}", savedToCache);
         if (savedToCache) {
           useFetchFromCache(cacheUsage.getCacheEntry());

http://git-wip-us.apache.org/repos/asf/hive/blob/7948def1/ql/src/java/org/apache/hadoop/hive/ql/cache/results/QueryResultsCache.java
----------------------------------------------------------------------
diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/cache/results/QueryResultsCache.java 
b/ql/src/java/org/apache/hadoop/hive/ql/cache/results/QueryResultsCache.java
index b1a3646..90c8ec3 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/cache/results/QueryResultsCache.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/cache/results/QueryResultsCache.java
@@ -43,6 +43,7 @@ import java.util.concurrent.atomic.AtomicInteger;
 import java.util.concurrent.locks.Lock;
 import java.util.concurrent.locks.ReadWriteLock;
 import java.util.concurrent.locks.ReentrantReadWriteLock;
+import java.util.function.Supplier;
 
 import org.apache.hadoop.fs.ContentSummary;
 import org.apache.hadoop.fs.FileSystem;
@@ -52,17 +53,21 @@ import org.apache.hadoop.hive.common.metrics.common.Metrics;
 import org.apache.hadoop.hive.common.metrics.common.MetricsConstant;
 import org.apache.hadoop.hive.common.metrics.common.MetricsFactory;
 import org.apache.hadoop.hive.common.metrics.common.MetricsVariable;
+import org.apache.hadoop.hive.common.ValidTxnWriteIdList;
+import org.apache.hadoop.hive.common.ValidWriteIdList;
 import org.apache.hadoop.hive.conf.HiveConf;
 import org.apache.hadoop.hive.metastore.api.FieldSchema;
 import org.apache.hadoop.hive.ql.exec.Utilities;
 import org.apache.hadoop.hive.ql.hooks.Entity.Type;
 import org.apache.hadoop.hive.ql.hooks.ReadEntity;
+import org.apache.hadoop.hive.ql.io.AcidUtils;
 import org.apache.hadoop.hive.ql.metadata.SessionHiveMetaStoreClient;
 import org.apache.hadoop.hive.ql.metadata.Table;
 import org.apache.hadoop.hive.ql.parse.ColumnAccessInfo;
 import org.apache.hadoop.hive.ql.parse.TableAccessInfo;
 import org.apache.hadoop.hive.ql.plan.FetchWork;
 import org.apache.hadoop.hive.ql.plan.HiveOperation;
+import org.apache.hive.common.util.TxnIdUtils;
 
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
@@ -76,10 +81,12 @@ public final class QueryResultsCache {
 
   public static class LookupInfo {
     private String queryText;
+    private Supplier<ValidTxnWriteIdList> txnWriteIdListProvider;
 
-    public LookupInfo(String queryText) {
+    public LookupInfo(String queryText, Supplier<ValidTxnWriteIdList> 
txnWriteIdListProvider) {
       super();
       this.queryText = queryText;
+      this.txnWriteIdListProvider = txnWriteIdListProvider;
     }
 
     public String getQueryText() {
@@ -174,6 +181,7 @@ public final class QueryResultsCache {
     private AtomicInteger readers = new AtomicInteger(0);
     private ScheduledFuture<?> invalidationFuture = null;
     private volatile CacheEntryStatus status = CacheEntryStatus.PENDING;
+    private ValidTxnWriteIdList txnWriteIdList;
 
     public void releaseReader() {
       int readerCount = 0;
@@ -389,15 +397,20 @@ public final class QueryResultsCache {
 
     LOG.debug("QueryResultsCache lookup for query: {}", request.queryText);
 
-       boolean foundPending = false;
+    boolean foundPending = false;
+    // Cannot entries while we currently hold read lock, so keep track of them 
to delete later.
+    Set<CacheEntry> entriesToRemove = new HashSet<CacheEntry>();
     Lock readLock = rwLock.readLock();
     try {
+      // Note: ReentrantReadWriteLock deos not allow upgrading a read lock to 
a write lock.
+      // Care must be taken while under read lock, to make sure we do not 
perform any actions
+      // which attempt to take a write lock.
       readLock.lock();
       Set<CacheEntry> candidates = queryMap.get(request.queryText);
       if (candidates != null) {
         CacheEntry pendingResult = null;
         for (CacheEntry candidate : candidates) {
-          if (entryMatches(request, candidate)) {
+          if (entryMatches(request, candidate, entriesToRemove)) {
             CacheEntryStatus entryStatus = candidate.status;
             if (entryStatus == CacheEntryStatus.VALID) {
               result = candidate;
@@ -422,6 +435,11 @@ public final class QueryResultsCache {
       readLock.unlock();
     }
 
+    // Now that we have exited read lock it is safe to remove any invalid 
entries.
+    for (CacheEntry invalidEntry : entriesToRemove) {
+      removeEntry(invalidEntry);
+    }
+
     LOG.debug("QueryResultsCache lookup result: {}", result);
     incrementMetric(MetricsConstant.QC_LOOKUPS);
     if (result != null) {
@@ -477,7 +495,7 @@ public final class QueryResultsCache {
    * @param fetchWork
    * @return
    */
-  public boolean setEntryValid(CacheEntry cacheEntry, FetchWork fetchWork) {
+  public boolean setEntryValid(CacheEntry cacheEntry, FetchWork fetchWork, 
ValidTxnWriteIdList txnWriteIdList) {
     String queryText = cacheEntry.getQueryText();
     boolean dataDirMoved = false;
     Path queryResultsPath = null;
@@ -527,6 +545,7 @@ public final class QueryResultsCache {
         cacheEntry.size = resultSize;
         this.cacheSize += resultSize;
         cacheEntry.createTime = System.currentTimeMillis();
+        cacheEntry.txnWriteIdList = txnWriteIdList;
 
         cacheEntry.setStatus(CacheEntryStatus.VALID);
         // Mark this entry as being in use. Caller will need to release later.
@@ -601,7 +620,15 @@ public final class QueryResultsCache {
   private static final float LRU_LOAD_FACTOR = 0.75f;
   private static final CacheEntry[] EMPTY_CACHEENTRY_ARRAY = {};
 
-  private boolean entryMatches(LookupInfo lookupInfo, CacheEntry entry) {
+  /**
+   * Check that the cache entry matches the lookupInfo.
+   * @param lookupInfo
+   * @param entry
+   * @param entriesToRemove Set of entries to be removed after exiting read 
lock section.
+   *                        If the entry is found to be invalid it will be 
added to this set.
+   * @return
+   */
+  private boolean entryMatches(LookupInfo lookupInfo, CacheEntry entry, 
Set<CacheEntry> entriesToRemove) {
     QueryInfo queryInfo = entry.getQueryInfo();
     for (ReadEntity readEntity : queryInfo.getInputs()) {
       // Check that the tables used do not resolve to temp tables.
@@ -614,6 +641,34 @@ public final class QueryResultsCache {
               tableUsed.getTableName());
           return false;
         }
+
+        // Has the table changed since the query was cached?
+        // For transactional tables, can compare the table writeIDs of the 
current/cached query.
+        if (AcidUtils.isTransactionalTable(tableUsed)) {
+          boolean writeIdCheckPassed = false;
+          String tableName = tableUsed.getFullyQualifiedName();
+          ValidTxnWriteIdList currentTxnWriteIdList = 
lookupInfo.txnWriteIdListProvider.get();
+          ValidWriteIdList currentWriteIdForTable =
+              currentTxnWriteIdList.getTableValidWriteIdList(tableName);
+          ValidWriteIdList cachedWriteIdForTable = 
entry.txnWriteIdList.getTableValidWriteIdList(tableName);
+
+          LOG.debug("Checking writeIds for table {}: currentWriteIdForTable 
{}, cachedWriteIdForTable {}",
+              tableName, currentWriteIdForTable, cachedWriteIdForTable);
+          if (currentWriteIdForTable != null && cachedWriteIdForTable != null) 
{
+            if (TxnIdUtils.checkEquivalentWriteIds(currentWriteIdForTable, 
cachedWriteIdForTable)) {
+              writeIdCheckPassed = true;
+            }
+          }
+
+          if (!writeIdCheckPassed) {
+            LOG.debug("Cached query no longer valid due to table {}", 
tableUsed.getFullyQualifiedName());
+            // We can invalidate the entry now, but calling removeEntry() 
requires a write lock
+            // and we may already have read lock taken now. Add to 
entriesToRemove to delete later.
+            entriesToRemove.add(entry);
+            entry.invalidate();
+            return false;
+          }
+        }
       }
     }
 

http://git-wip-us.apache.org/repos/asf/hive/blob/7948def1/ql/src/java/org/apache/hadoop/hive/ql/io/AcidUtils.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/AcidUtils.java 
b/ql/src/java/org/apache/hadoop/hive/ql/io/AcidUtils.java
index 44a7496..2b1960c 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/AcidUtils.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/io/AcidUtils.java
@@ -27,6 +27,8 @@ import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.fs.PathFilter;
 import org.apache.hadoop.hive.common.HiveStatsUtils;
+import org.apache.hadoop.hive.common.JavaUtils;
+import org.apache.hadoop.hive.common.ValidTxnList;
 import org.apache.hadoop.hive.common.ValidTxnWriteIdList;
 import org.apache.hadoop.hive.common.ValidWriteIdList;
 import org.apache.hadoop.hive.conf.HiveConf;
@@ -41,6 +43,8 @@ import org.apache.hadoop.hive.ql.io.orc.OrcInputFormat;
 import org.apache.hadoop.hive.ql.io.orc.OrcRecordUpdater;
 import org.apache.hadoop.hive.ql.io.orc.Reader;
 import org.apache.hadoop.hive.ql.io.orc.Writer;
+import org.apache.hadoop.hive.ql.lockmgr.HiveTxnManager;
+import org.apache.hadoop.hive.ql.lockmgr.LockException;
 import org.apache.hadoop.hive.ql.metadata.Table;
 import org.apache.hadoop.hive.ql.plan.CreateTableDesc;
 import org.apache.hadoop.hive.ql.plan.TableScanDesc;
@@ -1508,11 +1512,19 @@ public class AcidUtils {
   }
 
   /**
-   * Extract the ValidWriteIdList for the given table from the list of tables' 
ValidWriteIdList.
+   * Get the ValidTxnWriteIdList saved in the configuration.
    */
-  public static ValidWriteIdList getTableValidWriteIdList(Configuration conf, 
String fullTableName) {
+  public static ValidTxnWriteIdList getValidTxnWriteIdList(Configuration conf) 
{
     String txnString = conf.get(ValidTxnWriteIdList.VALID_TABLES_WRITEIDS_KEY);
     ValidTxnWriteIdList validTxnList = new ValidTxnWriteIdList(txnString);
+    return validTxnList;
+  }
+
+  /**
+   * Extract the ValidWriteIdList for the given table from the list of tables' 
ValidWriteIdList.
+   */
+  public static ValidWriteIdList getTableValidWriteIdList(Configuration conf, 
String fullTableName) {
+    ValidTxnWriteIdList validTxnList = getValidTxnWriteIdList(conf);
     return validTxnList.getTableValidWriteIdList(fullTableName);
   }
 

http://git-wip-us.apache.org/repos/asf/hive/blob/7948def1/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java 
b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
index 27efece..6df22a2 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
@@ -40,8 +40,10 @@ import java.util.Queue;
 import java.util.Set;
 import java.util.TreeSet;
 import java.util.UUID;
+import java.util.function.Supplier;
 import java.util.regex.Pattern;
 import java.util.regex.PatternSyntaxException;
+import java.util.stream.Collectors;
 
 import org.antlr.runtime.ClassicToken;
 import org.antlr.runtime.CommonToken;
@@ -63,6 +65,8 @@ import org.apache.hadoop.fs.permission.FsAction;
 import org.apache.hadoop.hive.common.FileUtils;
 import org.apache.hadoop.hive.common.ObjectPair;
 import org.apache.hadoop.hive.common.StatsSetupConst;
+import org.apache.hadoop.hive.common.ValidTxnList;
+import org.apache.hadoop.hive.common.ValidTxnWriteIdList;
 import org.apache.hadoop.hive.common.StatsSetupConst.StatDB;
 import org.apache.hadoop.hive.common.metrics.common.MetricsConstant;
 import org.apache.hadoop.hive.conf.HiveConf;
@@ -117,6 +121,7 @@ import org.apache.hadoop.hive.ql.exec.Task;
 import org.apache.hadoop.hive.ql.exec.TaskFactory;
 import org.apache.hadoop.hive.ql.exec.UnionOperator;
 import org.apache.hadoop.hive.ql.exec.Utilities;
+import org.apache.hadoop.hive.ql.hooks.Entity;
 import org.apache.hadoop.hive.ql.hooks.ReadEntity;
 import org.apache.hadoop.hive.ql.hooks.WriteEntity;
 import org.apache.hadoop.hive.ql.hooks.WriteEntity.WriteType;
@@ -14522,7 +14527,33 @@ public class SemanticAnalyzer extends 
BaseSemanticAnalyzer {
     QueryResultsCache.LookupInfo lookupInfo = null;
     String queryString = getQueryStringForCache(astNode);
     if (queryString != null) {
-      lookupInfo = new QueryResultsCache.LookupInfo(queryString);
+      lookupInfo = new QueryResultsCache.LookupInfo(queryString,
+          new Supplier<ValidTxnWriteIdList>() {
+            ValidTxnWriteIdList cachedWriteIdList = null;
+            @Override
+            public ValidTxnWriteIdList get() {
+              if (cachedWriteIdList == null) {
+                // TODO: Once HIVE-18948 is in, should be able to retrieve 
writeIdList from the conf.
+                //cachedWriteIdList = AcidUtils.getValidTxnWriteIdList(conf);
+                //
+                List<String> transactionalTables = 
tablesFromReadEntities(inputs)
+                    .stream()
+                    .filter(table -> AcidUtils.isTransactionalTable(table))
+                    .map(table -> table.getFullyQualifiedName())
+                    .collect(Collectors.toList());
+                try {
+                  String txnString = conf.get(ValidTxnList.VALID_TXNS_KEY);
+                  cachedWriteIdList =
+                      getTxnMgr().getValidWriteIds(transactionalTables, 
txnString);
+                } catch (Exception err) {
+                  String msg = "Error while getting the txnWriteIdList for 
tables " + transactionalTables
+                      + " and validTxnList " + 
conf.get(ValidTxnList.VALID_TXNS_KEY);
+                  throw new RuntimeException(msg, err);
+                }
+              }
+              return cachedWriteIdList;
+            }
+          });
     }
     return lookupInfo;
   }
@@ -14620,9 +14651,31 @@ public class SemanticAnalyzer extends 
BaseSemanticAnalyzer {
       return false;
     }
 
+    if 
(!conf.getBoolVar(ConfVars.HIVE_QUERY_RESULTS_CACHE_NONTRANSACTIONAL_TABLES_ENABLED))
 {
+      List<Table> nonTransactionalTables = getNonTransactionalTables();
+      if (nonTransactionalTables.size() > 0) {
+        LOG.info("Not eligible for results caching - query contains 
non-transactional tables {}",
+            nonTransactionalTables);
+        return false;
+      }
+    }
     return true;
   }
 
+  private static Set<Table> tablesFromReadEntities(Set<ReadEntity> 
readEntities) {
+    return readEntities.stream()
+        .filter(entity -> entity.getType() == Entity.Type.TABLE)
+        .map(entity -> entity.getTable())
+        .collect(Collectors.toSet());
+  }
+
+  private List<Table> getNonTransactionalTables() {
+    return tablesFromReadEntities(inputs)
+        .stream()
+        .filter(table -> !AcidUtils.isTransactionalTable(table))
+        .collect(Collectors.toList());
+  }
+
   /**
    * Check the query results cache to see if the query represented by the 
lookupInfo can be
    * answered using the results cache. If the cache contains a suitable entry, 
the semantic analyzer

http://git-wip-us.apache.org/repos/asf/hive/blob/7948def1/ql/src/test/queries/clientpositive/results_cache_1.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/results_cache_1.q 
b/ql/src/test/queries/clientpositive/results_cache_1.q
index 4aea60e..0c85c4a 100644
--- a/ql/src/test/queries/clientpositive/results_cache_1.q
+++ b/ql/src/test/queries/clientpositive/results_cache_1.q
@@ -1,5 +1,6 @@
 
 set hive.query.results.cache.enabled=true;
+set hive.query.results.cache.nontransactional.tables.enabled=true;
 
 explain
 select count(*) from src a join src b on (a.key = b.key);

http://git-wip-us.apache.org/repos/asf/hive/blob/7948def1/ql/src/test/queries/clientpositive/results_cache_2.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/results_cache_2.q 
b/ql/src/test/queries/clientpositive/results_cache_2.q
index 96a9092..034ec18 100644
--- a/ql/src/test/queries/clientpositive/results_cache_2.q
+++ b/ql/src/test/queries/clientpositive/results_cache_2.q
@@ -1,5 +1,6 @@
 
 set hive.query.results.cache.enabled=true;
+set hive.query.results.cache.nontransactional.tables.enabled=true;
 set hive.fetch.task.conversion=more;
 
 -- Test 1: fetch task

http://git-wip-us.apache.org/repos/asf/hive/blob/7948def1/ql/src/test/queries/clientpositive/results_cache_capacity.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/results_cache_capacity.q 
b/ql/src/test/queries/clientpositive/results_cache_capacity.q
index 9f54577..eeb11e1 100644
--- a/ql/src/test/queries/clientpositive/results_cache_capacity.q
+++ b/ql/src/test/queries/clientpositive/results_cache_capacity.q
@@ -1,5 +1,6 @@
 
 set hive.query.results.cache.enabled=true;
+set hive.query.results.cache.nontransactional.tables.enabled=true;
 
 -- Allow results cache to hold entries up to 125 bytes
 -- The single row queries are small enough to fit in the cache (103 bytes)

http://git-wip-us.apache.org/repos/asf/hive/blob/7948def1/ql/src/test/queries/clientpositive/results_cache_empty_result.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/results_cache_empty_result.q 
b/ql/src/test/queries/clientpositive/results_cache_empty_result.q
index 6213671..f5b99b5 100644
--- a/ql/src/test/queries/clientpositive/results_cache_empty_result.q
+++ b/ql/src/test/queries/clientpositive/results_cache_empty_result.q
@@ -1,5 +1,6 @@
 
 set hive.query.results.cache.enabled=true;
+set hive.query.results.cache.nontransactional.tables.enabled=true;
 
 explain
 select count(*), key from src a where key < 0 group by key;

http://git-wip-us.apache.org/repos/asf/hive/blob/7948def1/ql/src/test/queries/clientpositive/results_cache_invalidation.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/results_cache_invalidation.q 
b/ql/src/test/queries/clientpositive/results_cache_invalidation.q
new file mode 100644
index 0000000..b69bdf2
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/results_cache_invalidation.q
@@ -0,0 +1,89 @@
+
+set hive.support.concurrency=true;
+set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager;
+
+create table tab1 (key string, value string) stored as orc tblproperties 
('transactional'='true');
+create table tab2 (key string, value string) stored as orc tblproperties 
('transactional'='true');
+
+insert into tab1 select * from default.src;
+insert into tab2 select * from default.src;
+
+set hive.query.results.cache.enabled=true;
+
+set test.comment="Run queries to load into cache";
+set test.comment;
+
+-- Q1
+explain
+select count(*) from tab1 a where key >= 0;
+select count(*) from tab1 a where key >= 0;
+
+-- Q2
+explain
+select max(key) from tab2;
+select max(key) from tab2;
+
+-- Q3
+explain
+select count(*) from tab1 join tab2 on (tab1.key = tab2.key);
+select count(*) from tab1 join tab2 on (tab1.key = tab2.key);
+
+set test.comment="Q1 should now be able to use cache";
+set test.comment;
+explain
+select count(*) from tab1 a where key >= 0;
+select count(*) from tab1 a where key >= 0;
+
+set test.comment="Q2 should now be able to use cache";
+set test.comment;
+explain
+select max(key) from tab2;
+select max(key) from tab2;
+
+set test.comment="Q3 should now be able to use cache";
+set test.comment;
+explain
+select count(*) from tab1 join tab2 on (tab1.key = tab2.key);
+select count(*) from tab1 join tab2 on (tab1.key = tab2.key);
+
+-- Update tab1 which should invalidate Q1 and Q3.
+insert into tab1 values ('88', 'val_88');
+
+set test.comment="Q1 should not use cache";
+set test.comment;
+explain
+select count(*) from tab1 a where key >= 0;
+select count(*) from tab1 a where key >= 0;
+
+set test.comment="Q2 should still use cache since tab2 not updated";
+set test.comment;
+explain
+select max(key) from tab2;
+select max(key) from tab2;
+
+set test.comment="Q3 should not use cache";
+set test.comment;
+explain
+select count(*) from tab1 join tab2 on (tab1.key = tab2.key);
+select count(*) from tab1 join tab2 on (tab1.key = tab2.key);
+
+-- Update tab2 which should invalidate Q2 and Q3.
+insert into tab2 values ('88', 'val_88');
+
+set test.comment="Q1 should use cache";
+set test.comment;
+explain
+select count(*) from tab1 a where key >= 0;
+select count(*) from tab1 a where key >= 0;
+
+set test.comment="Q2 should not use cache";
+set test.comment;
+explain
+select max(key) from tab2;
+select max(key) from tab2;
+
+set test.comment="Q3 should not use cache";
+set test.comment;
+explain
+select count(*) from tab1 join tab2 on (tab1.key = tab2.key);
+select count(*) from tab1 join tab2 on (tab1.key = tab2.key);

http://git-wip-us.apache.org/repos/asf/hive/blob/7948def1/ql/src/test/queries/clientpositive/results_cache_lifetime.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/results_cache_lifetime.q 
b/ql/src/test/queries/clientpositive/results_cache_lifetime.q
index 60ffe96..1c306e1 100644
--- a/ql/src/test/queries/clientpositive/results_cache_lifetime.q
+++ b/ql/src/test/queries/clientpositive/results_cache_lifetime.q
@@ -1,5 +1,6 @@
 
 set hive.query.results.cache.enabled=true;
+set hive.query.results.cache.nontransactional.tables.enabled=true;
 set hive.query.results.cache.max.entry.lifetime=2;
 
 -- This query used the cache from results_cache_1.q. Load it up.

http://git-wip-us.apache.org/repos/asf/hive/blob/7948def1/ql/src/test/queries/clientpositive/results_cache_quoted_identifiers.q
----------------------------------------------------------------------
diff --git 
a/ql/src/test/queries/clientpositive/results_cache_quoted_identifiers.q 
b/ql/src/test/queries/clientpositive/results_cache_quoted_identifiers.q
index 4802f43..c5684f0 100644
--- a/ql/src/test/queries/clientpositive/results_cache_quoted_identifiers.q
+++ b/ql/src/test/queries/clientpositive/results_cache_quoted_identifiers.q
@@ -9,6 +9,7 @@ create table quoted1 (
 insert into quoted1 select key, key, value, value from src;
 
 set hive.query.results.cache.enabled=true;
+set hive.query.results.cache.nontransactional.tables.enabled=true;
 
 explain
 select max(`_c1`), max(`int`), max(`col 3`), max(`col``4`) from quoted1;

http://git-wip-us.apache.org/repos/asf/hive/blob/7948def1/ql/src/test/queries/clientpositive/results_cache_temptable.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/results_cache_temptable.q 
b/ql/src/test/queries/clientpositive/results_cache_temptable.q
index 9e0de76..7e16702 100644
--- a/ql/src/test/queries/clientpositive/results_cache_temptable.q
+++ b/ql/src/test/queries/clientpositive/results_cache_temptable.q
@@ -1,4 +1,5 @@
 set hive.query.results.cache.enabled=true;
+set hive.query.results.cache.nontransactional.tables.enabled=true;
 
 create table rct (key string, value string);
 load data local inpath '../../data/files/kv1.txt' overwrite into table rct;

http://git-wip-us.apache.org/repos/asf/hive/blob/7948def1/ql/src/test/queries/clientpositive/results_cache_transactional.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/results_cache_transactional.q 
b/ql/src/test/queries/clientpositive/results_cache_transactional.q
new file mode 100644
index 0000000..9181c6d
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/results_cache_transactional.q
@@ -0,0 +1,56 @@
+
+set hive.support.concurrency=true;
+set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager;
+
+create table tab1 (key string, value string) stored as orc tblproperties 
('transactional'='true');
+create table tab2 (key string, value string) stored as orc tblproperties 
('transactional'='true');
+
+insert into tab1 select * from default.src;
+insert into tab2 select * from default.src;
+
+set hive.query.results.cache.enabled=true;
+set hive.query.results.cache.nontransactional.tables.enabled=false;
+
+explain
+select max(key) from tab1;
+select max(key) from tab1;
+
+set test.comment="Query on transactional table should use cache";
+set test.comment;
+explain
+select max(key) from tab1;
+select max(key) from tab1;
+
+explain
+select count(*) from tab1 join tab2 on (tab1.key = tab2.key);
+select count(*) from tab1 join tab2 on (tab1.key = tab2.key);
+
+set test.comment="Join on transactional tables, should use cache";
+set test.comment;
+explain
+select count(*) from tab1 join tab2 on (tab1.key = tab2.key);
+select count(*) from tab1 join tab2 on (tab1.key = tab2.key);
+
+
+-- Non-transactional tables
+
+explain
+select max(key) from src;
+select max(key) from src;
+
+set test.comment="Query on non-transactional table should not use cache";
+set test.comment;
+explain
+select max(key) from src;
+select max(key) from src;
+
+explain
+select count(*) from tab1 join src on (tab1.key = src.key);
+select count(*) from tab1 join src on (tab1.key = src.key);
+
+set test.comment="Join uses non-transactional table, should not use cache";
+set test.comment;
+explain
+select count(*) from tab1 join src on (tab1.key = src.key);
+select count(*) from tab1 join src on (tab1.key = src.key);
+

http://git-wip-us.apache.org/repos/asf/hive/blob/7948def1/ql/src/test/queries/clientpositive/results_cache_with_masking.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/results_cache_with_masking.q 
b/ql/src/test/queries/clientpositive/results_cache_with_masking.q
index b4fcdd5..d353598 100644
--- a/ql/src/test/queries/clientpositive/results_cache_with_masking.q
+++ b/ql/src/test/queries/clientpositive/results_cache_with_masking.q
@@ -3,6 +3,7 @@ set hive.mapred.mode=nonstrict;
 set 
hive.security.authorization.manager=org.apache.hadoop.hive.ql.security.authorization.plugin.sqlstd.SQLStdHiveAuthorizerFactoryForTest;
 
 set hive.query.results.cache.enabled=true;
+set hive.query.results.cache.nontransactional.tables.enabled=true;
 
 create table masking_test as select cast(key as int) as key, value from src;
 

http://git-wip-us.apache.org/repos/asf/hive/blob/7948def1/ql/src/test/results/clientpositive/llap/results_cache_invalidation.q.out
----------------------------------------------------------------------
diff --git 
a/ql/src/test/results/clientpositive/llap/results_cache_invalidation.q.out 
b/ql/src/test/results/clientpositive/llap/results_cache_invalidation.q.out
new file mode 100644
index 0000000..c76de92
--- /dev/null
+++ b/ql/src/test/results/clientpositive/llap/results_cache_invalidation.q.out
@@ -0,0 +1,793 @@
+PREHOOK: query: create table tab1 (key string, value string) stored as orc 
tblproperties ('transactional'='true')
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@tab1
+POSTHOOK: query: create table tab1 (key string, value string) stored as orc 
tblproperties ('transactional'='true')
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@tab1
+PREHOOK: query: create table tab2 (key string, value string) stored as orc 
tblproperties ('transactional'='true')
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@tab2
+POSTHOOK: query: create table tab2 (key string, value string) stored as orc 
tblproperties ('transactional'='true')
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@tab2
+PREHOOK: query: insert into tab1 select * from default.src
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: default@tab1
+POSTHOOK: query: insert into tab1 select * from default.src
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: default@tab1
+POSTHOOK: Lineage: tab1.key SIMPLE [(src)src.FieldSchema(name:key, 
type:string, comment:default), ]
+POSTHOOK: Lineage: tab1.value SIMPLE [(src)src.FieldSchema(name:value, 
type:string, comment:default), ]
+PREHOOK: query: insert into tab2 select * from default.src
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: default@tab2
+POSTHOOK: query: insert into tab2 select * from default.src
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: default@tab2
+POSTHOOK: Lineage: tab2.key SIMPLE [(src)src.FieldSchema(name:key, 
type:string, comment:default), ]
+POSTHOOK: Lineage: tab2.value SIMPLE [(src)src.FieldSchema(name:value, 
type:string, comment:default), ]
+test.comment="Run queries to load into cache"
+PREHOOK: query: explain
+select count(*) from tab1 a where key >= 0
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select count(*) from tab1 a where key >= 0
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Tez
+#### A masked pattern was here ####
+      Edges:
+        Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE)
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
+            Map Operator Tree:
+                TableScan
+                  alias: a
+                  Statistics: Num rows: 91 Data size: 16192 Basic stats: 
COMPLETE Column stats: NONE
+                  Filter Operator
+                    predicate: (UDFToDouble(key) >= 0.0D) (type: boolean)
+                    Statistics: Num rows: 30 Data size: 5338 Basic stats: 
COMPLETE Column stats: NONE
+                    Select Operator
+                      Statistics: Num rows: 30 Data size: 5338 Basic stats: 
COMPLETE Column stats: NONE
+                      Group By Operator
+                        aggregations: count()
+                        mode: hash
+                        outputColumnNames: _col0
+                        Statistics: Num rows: 1 Data size: 192 Basic stats: 
COMPLETE Column stats: NONE
+                        Reduce Output Operator
+                          sort order: 
+                          Statistics: Num rows: 1 Data size: 192 Basic stats: 
COMPLETE Column stats: NONE
+                          value expressions: _col0 (type: bigint)
+            Execution mode: llap
+            LLAP IO: may be used (ACID table)
+        Reducer 2 
+            Execution mode: llap
+            Reduce Operator Tree:
+              Group By Operator
+                aggregations: count(VALUE._col0)
+                mode: mergepartial
+                outputColumnNames: _col0
+                Statistics: Num rows: 1 Data size: 192 Basic stats: COMPLETE 
Column stats: NONE
+                File Output Operator
+                  compressed: false
+                  Statistics: Num rows: 1 Data size: 192 Basic stats: COMPLETE 
Column stats: NONE
+                  table:
+                      input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+                      output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                      serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: select count(*) from tab1 a where key >= 0
+PREHOOK: type: QUERY
+PREHOOK: Input: default@tab1
+#### A masked pattern was here ####
+POSTHOOK: query: select count(*) from tab1 a where key >= 0
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@tab1
+#### A masked pattern was here ####
+500
+PREHOOK: query: explain
+select max(key) from tab2
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select max(key) from tab2
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Tez
+#### A masked pattern was here ####
+      Edges:
+        Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE)
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
+            Map Operator Tree:
+                TableScan
+                  alias: tab2
+                  Statistics: Num rows: 91 Data size: 16192 Basic stats: 
COMPLETE Column stats: NONE
+                  Select Operator
+                    expressions: key (type: string)
+                    outputColumnNames: key
+                    Statistics: Num rows: 91 Data size: 16192 Basic stats: 
COMPLETE Column stats: NONE
+                    Group By Operator
+                      aggregations: max(key)
+                      mode: hash
+                      outputColumnNames: _col0
+                      Statistics: Num rows: 1 Data size: 368 Basic stats: 
COMPLETE Column stats: NONE
+                      Reduce Output Operator
+                        sort order: 
+                        Statistics: Num rows: 1 Data size: 368 Basic stats: 
COMPLETE Column stats: NONE
+                        value expressions: _col0 (type: string)
+            Execution mode: llap
+            LLAP IO: may be used (ACID table)
+        Reducer 2 
+            Execution mode: llap
+            Reduce Operator Tree:
+              Group By Operator
+                aggregations: max(VALUE._col0)
+                mode: mergepartial
+                outputColumnNames: _col0
+                Statistics: Num rows: 1 Data size: 368 Basic stats: COMPLETE 
Column stats: NONE
+                File Output Operator
+                  compressed: false
+                  Statistics: Num rows: 1 Data size: 368 Basic stats: COMPLETE 
Column stats: NONE
+                  table:
+                      input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+                      output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                      serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: select max(key) from tab2
+PREHOOK: type: QUERY
+PREHOOK: Input: default@tab2
+#### A masked pattern was here ####
+POSTHOOK: query: select max(key) from tab2
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@tab2
+#### A masked pattern was here ####
+98
+PREHOOK: query: explain
+select count(*) from tab1 join tab2 on (tab1.key = tab2.key)
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select count(*) from tab1 join tab2 on (tab1.key = tab2.key)
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Tez
+#### A masked pattern was here ####
+      Edges:
+        Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE)
+        Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE)
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
+            Map Operator Tree:
+                TableScan
+                  alias: tab1
+                  Statistics: Num rows: 91 Data size: 16192 Basic stats: 
COMPLETE Column stats: NONE
+                  Filter Operator
+                    predicate: key is not null (type: boolean)
+                    Statistics: Num rows: 87 Data size: 15480 Basic stats: 
COMPLETE Column stats: NONE
+                    Select Operator
+                      expressions: key (type: string)
+                      outputColumnNames: _col0
+                      Statistics: Num rows: 87 Data size: 15480 Basic stats: 
COMPLETE Column stats: NONE
+                      Reduce Output Operator
+                        key expressions: _col0 (type: string)
+                        sort order: +
+                        Map-reduce partition columns: _col0 (type: string)
+                        Statistics: Num rows: 87 Data size: 15480 Basic stats: 
COMPLETE Column stats: NONE
+            Execution mode: llap
+            LLAP IO: may be used (ACID table)
+        Map 4 
+            Map Operator Tree:
+                TableScan
+                  alias: tab2
+                  Statistics: Num rows: 91 Data size: 16192 Basic stats: 
COMPLETE Column stats: NONE
+                  Filter Operator
+                    predicate: key is not null (type: boolean)
+                    Statistics: Num rows: 87 Data size: 15480 Basic stats: 
COMPLETE Column stats: NONE
+                    Select Operator
+                      expressions: key (type: string)
+                      outputColumnNames: _col0
+                      Statistics: Num rows: 87 Data size: 15480 Basic stats: 
COMPLETE Column stats: NONE
+                      Reduce Output Operator
+                        key expressions: _col0 (type: string)
+                        sort order: +
+                        Map-reduce partition columns: _col0 (type: string)
+                        Statistics: Num rows: 87 Data size: 15480 Basic stats: 
COMPLETE Column stats: NONE
+            Execution mode: llap
+            LLAP IO: may be used (ACID table)
+        Reducer 2 
+            Execution mode: llap
+            Reduce Operator Tree:
+              Merge Join Operator
+                condition map:
+                     Inner Join 0 to 1
+                keys:
+                  0 _col0 (type: string)
+                  1 _col0 (type: string)
+                Statistics: Num rows: 95 Data size: 17028 Basic stats: 
COMPLETE Column stats: NONE
+                Group By Operator
+                  aggregations: count()
+                  mode: hash
+                  outputColumnNames: _col0
+                  Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE 
Column stats: NONE
+                  Reduce Output Operator
+                    sort order: 
+                    Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE 
Column stats: NONE
+                    value expressions: _col0 (type: bigint)
+        Reducer 3 
+            Execution mode: llap
+            Reduce Operator Tree:
+              Group By Operator
+                aggregations: count(VALUE._col0)
+                mode: mergepartial
+                outputColumnNames: _col0
+                Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE 
Column stats: NONE
+                File Output Operator
+                  compressed: false
+                  Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE 
Column stats: NONE
+                  table:
+                      input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+                      output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                      serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: select count(*) from tab1 join tab2 on (tab1.key = tab2.key)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@tab1
+PREHOOK: Input: default@tab2
+#### A masked pattern was here ####
+POSTHOOK: query: select count(*) from tab1 join tab2 on (tab1.key = tab2.key)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@tab1
+POSTHOOK: Input: default@tab2
+#### A masked pattern was here ####
+1028
+test.comment="Q1 should now be able to use cache"
+PREHOOK: query: explain
+select count(*) from tab1 a where key >= 0
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select count(*) from tab1 a where key >= 0
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-0 is a root stage
+
+STAGE PLANS:
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+      Cached Query Result: true
+
+PREHOOK: query: select count(*) from tab1 a where key >= 0
+PREHOOK: type: QUERY
+PREHOOK: Input: default@tab1
+POSTHOOK: query: select count(*) from tab1 a where key >= 0
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@tab1
+500
+test.comment="Q2 should now be able to use cache"
+PREHOOK: query: explain
+select max(key) from tab2
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select max(key) from tab2
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-0 is a root stage
+
+STAGE PLANS:
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+      Cached Query Result: true
+
+PREHOOK: query: select max(key) from tab2
+PREHOOK: type: QUERY
+PREHOOK: Input: default@tab2
+POSTHOOK: query: select max(key) from tab2
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@tab2
+98
+test.comment="Q3 should now be able to use cache"
+PREHOOK: query: explain
+select count(*) from tab1 join tab2 on (tab1.key = tab2.key)
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select count(*) from tab1 join tab2 on (tab1.key = tab2.key)
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-0 is a root stage
+
+STAGE PLANS:
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+      Cached Query Result: true
+
+PREHOOK: query: select count(*) from tab1 join tab2 on (tab1.key = tab2.key)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@tab1
+PREHOOK: Input: default@tab2
+POSTHOOK: query: select count(*) from tab1 join tab2 on (tab1.key = tab2.key)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@tab1
+POSTHOOK: Input: default@tab2
+1028
+PREHOOK: query: insert into tab1 values ('88', 'val_88')
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@tab1
+POSTHOOK: query: insert into tab1 values ('88', 'val_88')
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@tab1
+POSTHOOK: Lineage: tab1.key SCRIPT []
+POSTHOOK: Lineage: tab1.value SCRIPT []
+test.comment="Q1 should not use cache"
+PREHOOK: query: explain
+select count(*) from tab1 a where key >= 0
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select count(*) from tab1 a where key >= 0
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Tez
+#### A masked pattern was here ####
+      Edges:
+        Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE)
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
+            Map Operator Tree:
+                TableScan
+                  alias: a
+                  Statistics: Num rows: 110 Data size: 19504 Basic stats: 
COMPLETE Column stats: NONE
+                  Filter Operator
+                    predicate: (UDFToDouble(key) >= 0.0D) (type: boolean)
+                    Statistics: Num rows: 36 Data size: 6383 Basic stats: 
COMPLETE Column stats: NONE
+                    Select Operator
+                      Statistics: Num rows: 36 Data size: 6383 Basic stats: 
COMPLETE Column stats: NONE
+                      Group By Operator
+                        aggregations: count()
+                        mode: hash
+                        outputColumnNames: _col0
+                        Statistics: Num rows: 1 Data size: 192 Basic stats: 
COMPLETE Column stats: NONE
+                        Reduce Output Operator
+                          sort order: 
+                          Statistics: Num rows: 1 Data size: 192 Basic stats: 
COMPLETE Column stats: NONE
+                          value expressions: _col0 (type: bigint)
+            Execution mode: llap
+            LLAP IO: may be used (ACID table)
+        Reducer 2 
+            Execution mode: llap
+            Reduce Operator Tree:
+              Group By Operator
+                aggregations: count(VALUE._col0)
+                mode: mergepartial
+                outputColumnNames: _col0
+                Statistics: Num rows: 1 Data size: 192 Basic stats: COMPLETE 
Column stats: NONE
+                File Output Operator
+                  compressed: false
+                  Statistics: Num rows: 1 Data size: 192 Basic stats: COMPLETE 
Column stats: NONE
+                  table:
+                      input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+                      output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                      serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: select count(*) from tab1 a where key >= 0
+PREHOOK: type: QUERY
+PREHOOK: Input: default@tab1
+#### A masked pattern was here ####
+POSTHOOK: query: select count(*) from tab1 a where key >= 0
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@tab1
+#### A masked pattern was here ####
+501
+test.comment="Q2 should still use cache since tab2 not updated"
+PREHOOK: query: explain
+select max(key) from tab2
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select max(key) from tab2
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-0 is a root stage
+
+STAGE PLANS:
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+      Cached Query Result: true
+
+PREHOOK: query: select max(key) from tab2
+PREHOOK: type: QUERY
+PREHOOK: Input: default@tab2
+POSTHOOK: query: select max(key) from tab2
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@tab2
+98
+test.comment="Q3 should not use cache"
+PREHOOK: query: explain
+select count(*) from tab1 join tab2 on (tab1.key = tab2.key)
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select count(*) from tab1 join tab2 on (tab1.key = tab2.key)
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Tez
+#### A masked pattern was here ####
+      Edges:
+        Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE)
+        Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE)
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
+            Map Operator Tree:
+                TableScan
+                  alias: tab1
+                  Statistics: Num rows: 110 Data size: 19504 Basic stats: 
COMPLETE Column stats: NONE
+                  Filter Operator
+                    predicate: key is not null (type: boolean)
+                    Statistics: Num rows: 105 Data size: 18617 Basic stats: 
COMPLETE Column stats: NONE
+                    Select Operator
+                      expressions: key (type: string)
+                      outputColumnNames: _col0
+                      Statistics: Num rows: 105 Data size: 18617 Basic stats: 
COMPLETE Column stats: NONE
+                      Reduce Output Operator
+                        key expressions: _col0 (type: string)
+                        sort order: +
+                        Map-reduce partition columns: _col0 (type: string)
+                        Statistics: Num rows: 105 Data size: 18617 Basic 
stats: COMPLETE Column stats: NONE
+            Execution mode: llap
+            LLAP IO: may be used (ACID table)
+        Map 4 
+            Map Operator Tree:
+                TableScan
+                  alias: tab2
+                  Statistics: Num rows: 91 Data size: 16192 Basic stats: 
COMPLETE Column stats: NONE
+                  Filter Operator
+                    predicate: key is not null (type: boolean)
+                    Statistics: Num rows: 87 Data size: 15480 Basic stats: 
COMPLETE Column stats: NONE
+                    Select Operator
+                      expressions: key (type: string)
+                      outputColumnNames: _col0
+                      Statistics: Num rows: 87 Data size: 15480 Basic stats: 
COMPLETE Column stats: NONE
+                      Reduce Output Operator
+                        key expressions: _col0 (type: string)
+                        sort order: +
+                        Map-reduce partition columns: _col0 (type: string)
+                        Statistics: Num rows: 87 Data size: 15480 Basic stats: 
COMPLETE Column stats: NONE
+            Execution mode: llap
+            LLAP IO: may be used (ACID table)
+        Reducer 2 
+            Execution mode: llap
+            Reduce Operator Tree:
+              Merge Join Operator
+                condition map:
+                     Inner Join 0 to 1
+                keys:
+                  0 _col0 (type: string)
+                  1 _col0 (type: string)
+                Statistics: Num rows: 115 Data size: 20478 Basic stats: 
COMPLETE Column stats: NONE
+                Group By Operator
+                  aggregations: count()
+                  mode: hash
+                  outputColumnNames: _col0
+                  Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE 
Column stats: NONE
+                  Reduce Output Operator
+                    sort order: 
+                    Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE 
Column stats: NONE
+                    value expressions: _col0 (type: bigint)
+        Reducer 3 
+            Execution mode: llap
+            Reduce Operator Tree:
+              Group By Operator
+                aggregations: count(VALUE._col0)
+                mode: mergepartial
+                outputColumnNames: _col0
+                Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE 
Column stats: NONE
+                File Output Operator
+                  compressed: false
+                  Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE 
Column stats: NONE
+                  table:
+                      input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+                      output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                      serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: select count(*) from tab1 join tab2 on (tab1.key = tab2.key)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@tab1
+PREHOOK: Input: default@tab2
+#### A masked pattern was here ####
+POSTHOOK: query: select count(*) from tab1 join tab2 on (tab1.key = tab2.key)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@tab1
+POSTHOOK: Input: default@tab2
+#### A masked pattern was here ####
+1028
+PREHOOK: query: insert into tab2 values ('88', 'val_88')
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@tab2
+POSTHOOK: query: insert into tab2 values ('88', 'val_88')
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@tab2
+POSTHOOK: Lineage: tab2.key SCRIPT []
+POSTHOOK: Lineage: tab2.value SCRIPT []
+test.comment="Q1 should use cache"
+PREHOOK: query: explain
+select count(*) from tab1 a where key >= 0
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select count(*) from tab1 a where key >= 0
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-0 is a root stage
+
+STAGE PLANS:
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+      Cached Query Result: true
+
+PREHOOK: query: select count(*) from tab1 a where key >= 0
+PREHOOK: type: QUERY
+PREHOOK: Input: default@tab1
+POSTHOOK: query: select count(*) from tab1 a where key >= 0
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@tab1
+501
+test.comment="Q2 should not use cache"
+PREHOOK: query: explain
+select max(key) from tab2
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select max(key) from tab2
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Tez
+#### A masked pattern was here ####
+      Edges:
+        Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE)
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
+            Map Operator Tree:
+                TableScan
+                  alias: tab2
+                  Statistics: Num rows: 110 Data size: 19504 Basic stats: 
COMPLETE Column stats: NONE
+                  Select Operator
+                    expressions: key (type: string)
+                    outputColumnNames: key
+                    Statistics: Num rows: 110 Data size: 19504 Basic stats: 
COMPLETE Column stats: NONE
+                    Group By Operator
+                      aggregations: max(key)
+                      mode: hash
+                      outputColumnNames: _col0
+                      Statistics: Num rows: 1 Data size: 368 Basic stats: 
COMPLETE Column stats: NONE
+                      Reduce Output Operator
+                        sort order: 
+                        Statistics: Num rows: 1 Data size: 368 Basic stats: 
COMPLETE Column stats: NONE
+                        value expressions: _col0 (type: string)
+            Execution mode: llap
+            LLAP IO: may be used (ACID table)
+        Reducer 2 
+            Execution mode: llap
+            Reduce Operator Tree:
+              Group By Operator
+                aggregations: max(VALUE._col0)
+                mode: mergepartial
+                outputColumnNames: _col0
+                Statistics: Num rows: 1 Data size: 368 Basic stats: COMPLETE 
Column stats: NONE
+                File Output Operator
+                  compressed: false
+                  Statistics: Num rows: 1 Data size: 368 Basic stats: COMPLETE 
Column stats: NONE
+                  table:
+                      input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+                      output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                      serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: select max(key) from tab2
+PREHOOK: type: QUERY
+PREHOOK: Input: default@tab2
+#### A masked pattern was here ####
+POSTHOOK: query: select max(key) from tab2
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@tab2
+#### A masked pattern was here ####
+98
+test.comment="Q3 should not use cache"
+PREHOOK: query: explain
+select count(*) from tab1 join tab2 on (tab1.key = tab2.key)
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select count(*) from tab1 join tab2 on (tab1.key = tab2.key)
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Tez
+#### A masked pattern was here ####
+      Edges:
+        Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE)
+        Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE)
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
+            Map Operator Tree:
+                TableScan
+                  alias: tab1
+                  Statistics: Num rows: 110 Data size: 19504 Basic stats: 
COMPLETE Column stats: NONE
+                  Filter Operator
+                    predicate: key is not null (type: boolean)
+                    Statistics: Num rows: 105 Data size: 18617 Basic stats: 
COMPLETE Column stats: NONE
+                    Select Operator
+                      expressions: key (type: string)
+                      outputColumnNames: _col0
+                      Statistics: Num rows: 105 Data size: 18617 Basic stats: 
COMPLETE Column stats: NONE
+                      Reduce Output Operator
+                        key expressions: _col0 (type: string)
+                        sort order: +
+                        Map-reduce partition columns: _col0 (type: string)
+                        Statistics: Num rows: 105 Data size: 18617 Basic 
stats: COMPLETE Column stats: NONE
+            Execution mode: llap
+            LLAP IO: may be used (ACID table)
+        Map 4 
+            Map Operator Tree:
+                TableScan
+                  alias: tab2
+                  Statistics: Num rows: 110 Data size: 19504 Basic stats: 
COMPLETE Column stats: NONE
+                  Filter Operator
+                    predicate: key is not null (type: boolean)
+                    Statistics: Num rows: 105 Data size: 18617 Basic stats: 
COMPLETE Column stats: NONE
+                    Select Operator
+                      expressions: key (type: string)
+                      outputColumnNames: _col0
+                      Statistics: Num rows: 105 Data size: 18617 Basic stats: 
COMPLETE Column stats: NONE
+                      Reduce Output Operator
+                        key expressions: _col0 (type: string)
+                        sort order: +
+                        Map-reduce partition columns: _col0 (type: string)
+                        Statistics: Num rows: 105 Data size: 18617 Basic 
stats: COMPLETE Column stats: NONE
+            Execution mode: llap
+            LLAP IO: may be used (ACID table)
+        Reducer 2 
+            Execution mode: llap
+            Reduce Operator Tree:
+              Merge Join Operator
+                condition map:
+                     Inner Join 0 to 1
+                keys:
+                  0 _col0 (type: string)
+                  1 _col0 (type: string)
+                Statistics: Num rows: 115 Data size: 20478 Basic stats: 
COMPLETE Column stats: NONE
+                Group By Operator
+                  aggregations: count()
+                  mode: hash
+                  outputColumnNames: _col0
+                  Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE 
Column stats: NONE
+                  Reduce Output Operator
+                    sort order: 
+                    Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE 
Column stats: NONE
+                    value expressions: _col0 (type: bigint)
+        Reducer 3 
+            Execution mode: llap
+            Reduce Operator Tree:
+              Group By Operator
+                aggregations: count(VALUE._col0)
+                mode: mergepartial
+                outputColumnNames: _col0
+                Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE 
Column stats: NONE
+                File Output Operator
+                  compressed: false
+                  Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE 
Column stats: NONE
+                  table:
+                      input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+                      output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                      serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: select count(*) from tab1 join tab2 on (tab1.key = tab2.key)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@tab1
+PREHOOK: Input: default@tab2
+#### A masked pattern was here ####
+POSTHOOK: query: select count(*) from tab1 join tab2 on (tab1.key = tab2.key)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@tab1
+POSTHOOK: Input: default@tab2
+#### A masked pattern was here ####
+1029

[2/2] hive git commit: HIVE-18609: Results cache invalidation based on ACID table updates (Jason Dere, reviewed by GopalV)

Reply via email to