HIVE-18609: Results cache invalidation based on ACID table updates (Jason Dere, reviewed by GopalV)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/7948def1 Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/7948def1 Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/7948def1 Branch: refs/heads/branch-3 Commit: 7948def19971df6b18870a4f388e30ce08d37deb Parents: 67a8442 Author: Jason Dere <jd...@hortonworks.com> Authored: Mon Apr 16 16:44:40 2018 -0700 Committer: Jason Dere <jd...@hortonworks.com> Committed: Mon Apr 16 16:44:40 2018 -0700 ---------------------------------------------------------------------- .../org/apache/hadoop/hive/conf/HiveConf.java | 4 + .../test/resources/testconfiguration.properties | 2 + .../java/org/apache/hadoop/hive/ql/Driver.java | 7 +- .../ql/cache/results/QueryResultsCache.java | 65 +- .../org/apache/hadoop/hive/ql/io/AcidUtils.java | 16 +- .../hadoop/hive/ql/parse/SemanticAnalyzer.java | 55 +- .../queries/clientpositive/results_cache_1.q | 1 + .../queries/clientpositive/results_cache_2.q | 1 + .../clientpositive/results_cache_capacity.q | 1 + .../clientpositive/results_cache_empty_result.q | 1 + .../clientpositive/results_cache_invalidation.q | 89 +++ .../clientpositive/results_cache_lifetime.q | 1 + .../results_cache_quoted_identifiers.q | 1 + .../clientpositive/results_cache_temptable.q | 1 + .../results_cache_transactional.q | 56 ++ .../clientpositive/results_cache_with_masking.q | 1 + .../llap/results_cache_invalidation.q.out | 793 +++++++++++++++++++ .../llap/results_cache_transactional.q.out | 624 +++++++++++++++ .../results_cache_invalidation.q.out | 748 +++++++++++++++++ .../results_cache_transactional.q.out | 583 ++++++++++++++ 20 files changed, 3041 insertions(+), 9 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hive/blob/7948def1/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java ---------------------------------------------------------------------- diff --git a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java index e540d02..9eb76e7 100644 --- a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java +++ b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java @@ -4266,6 +4266,10 @@ public class HiveConf extends Configuration { "If the query results cache is enabled. This will keep results of previously executed queries " + "to be reused if the same query is executed again."), + HIVE_QUERY_RESULTS_CACHE_NONTRANSACTIONAL_TABLES_ENABLED("hive.query.results.cache.nontransactional.tables.enabled", false, + "If the query results cache is enabled for queries involving non-transactional tables." + + "Users who enable this setting should be willing to tolerate some amount of stale results in the cache."), + HIVE_QUERY_RESULTS_CACHE_WAIT_FOR_PENDING_RESULTS("hive.query.results.cache.wait.for.pending.results", true, "Should a query wait for the pending results of an already running query, " + "in order to use the cached result when it becomes ready"), http://git-wip-us.apache.org/repos/asf/hive/blob/7948def1/itests/src/test/resources/testconfiguration.properties ---------------------------------------------------------------------- diff --git a/itests/src/test/resources/testconfiguration.properties b/itests/src/test/resources/testconfiguration.properties index c5d4e9f..27e5feb 100644 --- a/itests/src/test/resources/testconfiguration.properties +++ b/itests/src/test/resources/testconfiguration.properties @@ -244,6 +244,8 @@ minillaplocal.shared.query.files=alter_merge_2_orc.q,\ ptf_streaming.q,\ results_cache_1.q,\ results_cache_empty_result.q,\ + results_cache_invalidation.q,\ + results_cache_transactional.q,\ sample1.q,\ selectDistinctStar.q,\ select_dummy_source.q,\ http://git-wip-us.apache.org/repos/asf/hive/blob/7948def1/ql/src/java/org/apache/hadoop/hive/ql/Driver.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/Driver.java b/ql/src/java/org/apache/hadoop/hive/ql/Driver.java index a88453c..4acdd9b 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/Driver.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/Driver.java @@ -1977,9 +1977,14 @@ public class Driver implements IDriver { PerfLogger perfLogger = SessionState.getPerfLogger(); perfLogger.PerfLogBegin(CLASS_NAME, PerfLogger.SAVE_TO_RESULTS_CACHE); + ValidTxnWriteIdList txnWriteIdList = null; + if (plan.hasAcidResourcesInQuery()) { + txnWriteIdList = AcidUtils.getValidTxnWriteIdList(conf); + } boolean savedToCache = QueryResultsCache.getInstance().setEntryValid( cacheUsage.getCacheEntry(), - plan.getFetchTask().getWork()); + plan.getFetchTask().getWork(), + txnWriteIdList); LOG.info("savedToCache: {}", savedToCache); if (savedToCache) { useFetchFromCache(cacheUsage.getCacheEntry()); http://git-wip-us.apache.org/repos/asf/hive/blob/7948def1/ql/src/java/org/apache/hadoop/hive/ql/cache/results/QueryResultsCache.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/cache/results/QueryResultsCache.java b/ql/src/java/org/apache/hadoop/hive/ql/cache/results/QueryResultsCache.java index b1a3646..90c8ec3 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/cache/results/QueryResultsCache.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/cache/results/QueryResultsCache.java @@ -43,6 +43,7 @@ import java.util.concurrent.atomic.AtomicInteger; import java.util.concurrent.locks.Lock; import java.util.concurrent.locks.ReadWriteLock; import java.util.concurrent.locks.ReentrantReadWriteLock; +import java.util.function.Supplier; import org.apache.hadoop.fs.ContentSummary; import org.apache.hadoop.fs.FileSystem; @@ -52,17 +53,21 @@ import org.apache.hadoop.hive.common.metrics.common.Metrics; import org.apache.hadoop.hive.common.metrics.common.MetricsConstant; import org.apache.hadoop.hive.common.metrics.common.MetricsFactory; import org.apache.hadoop.hive.common.metrics.common.MetricsVariable; +import org.apache.hadoop.hive.common.ValidTxnWriteIdList; +import org.apache.hadoop.hive.common.ValidWriteIdList; import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.metastore.api.FieldSchema; import org.apache.hadoop.hive.ql.exec.Utilities; import org.apache.hadoop.hive.ql.hooks.Entity.Type; import org.apache.hadoop.hive.ql.hooks.ReadEntity; +import org.apache.hadoop.hive.ql.io.AcidUtils; import org.apache.hadoop.hive.ql.metadata.SessionHiveMetaStoreClient; import org.apache.hadoop.hive.ql.metadata.Table; import org.apache.hadoop.hive.ql.parse.ColumnAccessInfo; import org.apache.hadoop.hive.ql.parse.TableAccessInfo; import org.apache.hadoop.hive.ql.plan.FetchWork; import org.apache.hadoop.hive.ql.plan.HiveOperation; +import org.apache.hive.common.util.TxnIdUtils; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -76,10 +81,12 @@ public final class QueryResultsCache { public static class LookupInfo { private String queryText; + private Supplier<ValidTxnWriteIdList> txnWriteIdListProvider; - public LookupInfo(String queryText) { + public LookupInfo(String queryText, Supplier<ValidTxnWriteIdList> txnWriteIdListProvider) { super(); this.queryText = queryText; + this.txnWriteIdListProvider = txnWriteIdListProvider; } public String getQueryText() { @@ -174,6 +181,7 @@ public final class QueryResultsCache { private AtomicInteger readers = new AtomicInteger(0); private ScheduledFuture<?> invalidationFuture = null; private volatile CacheEntryStatus status = CacheEntryStatus.PENDING; + private ValidTxnWriteIdList txnWriteIdList; public void releaseReader() { int readerCount = 0; @@ -389,15 +397,20 @@ public final class QueryResultsCache { LOG.debug("QueryResultsCache lookup for query: {}", request.queryText); - boolean foundPending = false; + boolean foundPending = false; + // Cannot entries while we currently hold read lock, so keep track of them to delete later. + Set<CacheEntry> entriesToRemove = new HashSet<CacheEntry>(); Lock readLock = rwLock.readLock(); try { + // Note: ReentrantReadWriteLock deos not allow upgrading a read lock to a write lock. + // Care must be taken while under read lock, to make sure we do not perform any actions + // which attempt to take a write lock. readLock.lock(); Set<CacheEntry> candidates = queryMap.get(request.queryText); if (candidates != null) { CacheEntry pendingResult = null; for (CacheEntry candidate : candidates) { - if (entryMatches(request, candidate)) { + if (entryMatches(request, candidate, entriesToRemove)) { CacheEntryStatus entryStatus = candidate.status; if (entryStatus == CacheEntryStatus.VALID) { result = candidate; @@ -422,6 +435,11 @@ public final class QueryResultsCache { readLock.unlock(); } + // Now that we have exited read lock it is safe to remove any invalid entries. + for (CacheEntry invalidEntry : entriesToRemove) { + removeEntry(invalidEntry); + } + LOG.debug("QueryResultsCache lookup result: {}", result); incrementMetric(MetricsConstant.QC_LOOKUPS); if (result != null) { @@ -477,7 +495,7 @@ public final class QueryResultsCache { * @param fetchWork * @return */ - public boolean setEntryValid(CacheEntry cacheEntry, FetchWork fetchWork) { + public boolean setEntryValid(CacheEntry cacheEntry, FetchWork fetchWork, ValidTxnWriteIdList txnWriteIdList) { String queryText = cacheEntry.getQueryText(); boolean dataDirMoved = false; Path queryResultsPath = null; @@ -527,6 +545,7 @@ public final class QueryResultsCache { cacheEntry.size = resultSize; this.cacheSize += resultSize; cacheEntry.createTime = System.currentTimeMillis(); + cacheEntry.txnWriteIdList = txnWriteIdList; cacheEntry.setStatus(CacheEntryStatus.VALID); // Mark this entry as being in use. Caller will need to release later. @@ -601,7 +620,15 @@ public final class QueryResultsCache { private static final float LRU_LOAD_FACTOR = 0.75f; private static final CacheEntry[] EMPTY_CACHEENTRY_ARRAY = {}; - private boolean entryMatches(LookupInfo lookupInfo, CacheEntry entry) { + /** + * Check that the cache entry matches the lookupInfo. + * @param lookupInfo + * @param entry + * @param entriesToRemove Set of entries to be removed after exiting read lock section. + * If the entry is found to be invalid it will be added to this set. + * @return + */ + private boolean entryMatches(LookupInfo lookupInfo, CacheEntry entry, Set<CacheEntry> entriesToRemove) { QueryInfo queryInfo = entry.getQueryInfo(); for (ReadEntity readEntity : queryInfo.getInputs()) { // Check that the tables used do not resolve to temp tables. @@ -614,6 +641,34 @@ public final class QueryResultsCache { tableUsed.getTableName()); return false; } + + // Has the table changed since the query was cached? + // For transactional tables, can compare the table writeIDs of the current/cached query. + if (AcidUtils.isTransactionalTable(tableUsed)) { + boolean writeIdCheckPassed = false; + String tableName = tableUsed.getFullyQualifiedName(); + ValidTxnWriteIdList currentTxnWriteIdList = lookupInfo.txnWriteIdListProvider.get(); + ValidWriteIdList currentWriteIdForTable = + currentTxnWriteIdList.getTableValidWriteIdList(tableName); + ValidWriteIdList cachedWriteIdForTable = entry.txnWriteIdList.getTableValidWriteIdList(tableName); + + LOG.debug("Checking writeIds for table {}: currentWriteIdForTable {}, cachedWriteIdForTable {}", + tableName, currentWriteIdForTable, cachedWriteIdForTable); + if (currentWriteIdForTable != null && cachedWriteIdForTable != null) { + if (TxnIdUtils.checkEquivalentWriteIds(currentWriteIdForTable, cachedWriteIdForTable)) { + writeIdCheckPassed = true; + } + } + + if (!writeIdCheckPassed) { + LOG.debug("Cached query no longer valid due to table {}", tableUsed.getFullyQualifiedName()); + // We can invalidate the entry now, but calling removeEntry() requires a write lock + // and we may already have read lock taken now. Add to entriesToRemove to delete later. + entriesToRemove.add(entry); + entry.invalidate(); + return false; + } + } } } http://git-wip-us.apache.org/repos/asf/hive/blob/7948def1/ql/src/java/org/apache/hadoop/hive/ql/io/AcidUtils.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/AcidUtils.java b/ql/src/java/org/apache/hadoop/hive/ql/io/AcidUtils.java index 44a7496..2b1960c 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/io/AcidUtils.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/io/AcidUtils.java @@ -27,6 +27,8 @@ import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.PathFilter; import org.apache.hadoop.hive.common.HiveStatsUtils; +import org.apache.hadoop.hive.common.JavaUtils; +import org.apache.hadoop.hive.common.ValidTxnList; import org.apache.hadoop.hive.common.ValidTxnWriteIdList; import org.apache.hadoop.hive.common.ValidWriteIdList; import org.apache.hadoop.hive.conf.HiveConf; @@ -41,6 +43,8 @@ import org.apache.hadoop.hive.ql.io.orc.OrcInputFormat; import org.apache.hadoop.hive.ql.io.orc.OrcRecordUpdater; import org.apache.hadoop.hive.ql.io.orc.Reader; import org.apache.hadoop.hive.ql.io.orc.Writer; +import org.apache.hadoop.hive.ql.lockmgr.HiveTxnManager; +import org.apache.hadoop.hive.ql.lockmgr.LockException; import org.apache.hadoop.hive.ql.metadata.Table; import org.apache.hadoop.hive.ql.plan.CreateTableDesc; import org.apache.hadoop.hive.ql.plan.TableScanDesc; @@ -1508,11 +1512,19 @@ public class AcidUtils { } /** - * Extract the ValidWriteIdList for the given table from the list of tables' ValidWriteIdList. + * Get the ValidTxnWriteIdList saved in the configuration. */ - public static ValidWriteIdList getTableValidWriteIdList(Configuration conf, String fullTableName) { + public static ValidTxnWriteIdList getValidTxnWriteIdList(Configuration conf) { String txnString = conf.get(ValidTxnWriteIdList.VALID_TABLES_WRITEIDS_KEY); ValidTxnWriteIdList validTxnList = new ValidTxnWriteIdList(txnString); + return validTxnList; + } + + /** + * Extract the ValidWriteIdList for the given table from the list of tables' ValidWriteIdList. + */ + public static ValidWriteIdList getTableValidWriteIdList(Configuration conf, String fullTableName) { + ValidTxnWriteIdList validTxnList = getValidTxnWriteIdList(conf); return validTxnList.getTableValidWriteIdList(fullTableName); } http://git-wip-us.apache.org/repos/asf/hive/blob/7948def1/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java index 27efece..6df22a2 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java @@ -40,8 +40,10 @@ import java.util.Queue; import java.util.Set; import java.util.TreeSet; import java.util.UUID; +import java.util.function.Supplier; import java.util.regex.Pattern; import java.util.regex.PatternSyntaxException; +import java.util.stream.Collectors; import org.antlr.runtime.ClassicToken; import org.antlr.runtime.CommonToken; @@ -63,6 +65,8 @@ import org.apache.hadoop.fs.permission.FsAction; import org.apache.hadoop.hive.common.FileUtils; import org.apache.hadoop.hive.common.ObjectPair; import org.apache.hadoop.hive.common.StatsSetupConst; +import org.apache.hadoop.hive.common.ValidTxnList; +import org.apache.hadoop.hive.common.ValidTxnWriteIdList; import org.apache.hadoop.hive.common.StatsSetupConst.StatDB; import org.apache.hadoop.hive.common.metrics.common.MetricsConstant; import org.apache.hadoop.hive.conf.HiveConf; @@ -117,6 +121,7 @@ import org.apache.hadoop.hive.ql.exec.Task; import org.apache.hadoop.hive.ql.exec.TaskFactory; import org.apache.hadoop.hive.ql.exec.UnionOperator; import org.apache.hadoop.hive.ql.exec.Utilities; +import org.apache.hadoop.hive.ql.hooks.Entity; import org.apache.hadoop.hive.ql.hooks.ReadEntity; import org.apache.hadoop.hive.ql.hooks.WriteEntity; import org.apache.hadoop.hive.ql.hooks.WriteEntity.WriteType; @@ -14522,7 +14527,33 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer { QueryResultsCache.LookupInfo lookupInfo = null; String queryString = getQueryStringForCache(astNode); if (queryString != null) { - lookupInfo = new QueryResultsCache.LookupInfo(queryString); + lookupInfo = new QueryResultsCache.LookupInfo(queryString, + new Supplier<ValidTxnWriteIdList>() { + ValidTxnWriteIdList cachedWriteIdList = null; + @Override + public ValidTxnWriteIdList get() { + if (cachedWriteIdList == null) { + // TODO: Once HIVE-18948 is in, should be able to retrieve writeIdList from the conf. + //cachedWriteIdList = AcidUtils.getValidTxnWriteIdList(conf); + // + List<String> transactionalTables = tablesFromReadEntities(inputs) + .stream() + .filter(table -> AcidUtils.isTransactionalTable(table)) + .map(table -> table.getFullyQualifiedName()) + .collect(Collectors.toList()); + try { + String txnString = conf.get(ValidTxnList.VALID_TXNS_KEY); + cachedWriteIdList = + getTxnMgr().getValidWriteIds(transactionalTables, txnString); + } catch (Exception err) { + String msg = "Error while getting the txnWriteIdList for tables " + transactionalTables + + " and validTxnList " + conf.get(ValidTxnList.VALID_TXNS_KEY); + throw new RuntimeException(msg, err); + } + } + return cachedWriteIdList; + } + }); } return lookupInfo; } @@ -14620,9 +14651,31 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer { return false; } + if (!conf.getBoolVar(ConfVars.HIVE_QUERY_RESULTS_CACHE_NONTRANSACTIONAL_TABLES_ENABLED)) { + List<Table> nonTransactionalTables = getNonTransactionalTables(); + if (nonTransactionalTables.size() > 0) { + LOG.info("Not eligible for results caching - query contains non-transactional tables {}", + nonTransactionalTables); + return false; + } + } return true; } + private static Set<Table> tablesFromReadEntities(Set<ReadEntity> readEntities) { + return readEntities.stream() + .filter(entity -> entity.getType() == Entity.Type.TABLE) + .map(entity -> entity.getTable()) + .collect(Collectors.toSet()); + } + + private List<Table> getNonTransactionalTables() { + return tablesFromReadEntities(inputs) + .stream() + .filter(table -> !AcidUtils.isTransactionalTable(table)) + .collect(Collectors.toList()); + } + /** * Check the query results cache to see if the query represented by the lookupInfo can be * answered using the results cache. If the cache contains a suitable entry, the semantic analyzer http://git-wip-us.apache.org/repos/asf/hive/blob/7948def1/ql/src/test/queries/clientpositive/results_cache_1.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/results_cache_1.q b/ql/src/test/queries/clientpositive/results_cache_1.q index 4aea60e..0c85c4a 100644 --- a/ql/src/test/queries/clientpositive/results_cache_1.q +++ b/ql/src/test/queries/clientpositive/results_cache_1.q @@ -1,5 +1,6 @@ set hive.query.results.cache.enabled=true; +set hive.query.results.cache.nontransactional.tables.enabled=true; explain select count(*) from src a join src b on (a.key = b.key); http://git-wip-us.apache.org/repos/asf/hive/blob/7948def1/ql/src/test/queries/clientpositive/results_cache_2.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/results_cache_2.q b/ql/src/test/queries/clientpositive/results_cache_2.q index 96a9092..034ec18 100644 --- a/ql/src/test/queries/clientpositive/results_cache_2.q +++ b/ql/src/test/queries/clientpositive/results_cache_2.q @@ -1,5 +1,6 @@ set hive.query.results.cache.enabled=true; +set hive.query.results.cache.nontransactional.tables.enabled=true; set hive.fetch.task.conversion=more; -- Test 1: fetch task http://git-wip-us.apache.org/repos/asf/hive/blob/7948def1/ql/src/test/queries/clientpositive/results_cache_capacity.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/results_cache_capacity.q b/ql/src/test/queries/clientpositive/results_cache_capacity.q index 9f54577..eeb11e1 100644 --- a/ql/src/test/queries/clientpositive/results_cache_capacity.q +++ b/ql/src/test/queries/clientpositive/results_cache_capacity.q @@ -1,5 +1,6 @@ set hive.query.results.cache.enabled=true; +set hive.query.results.cache.nontransactional.tables.enabled=true; -- Allow results cache to hold entries up to 125 bytes -- The single row queries are small enough to fit in the cache (103 bytes) http://git-wip-us.apache.org/repos/asf/hive/blob/7948def1/ql/src/test/queries/clientpositive/results_cache_empty_result.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/results_cache_empty_result.q b/ql/src/test/queries/clientpositive/results_cache_empty_result.q index 6213671..f5b99b5 100644 --- a/ql/src/test/queries/clientpositive/results_cache_empty_result.q +++ b/ql/src/test/queries/clientpositive/results_cache_empty_result.q @@ -1,5 +1,6 @@ set hive.query.results.cache.enabled=true; +set hive.query.results.cache.nontransactional.tables.enabled=true; explain select count(*), key from src a where key < 0 group by key; http://git-wip-us.apache.org/repos/asf/hive/blob/7948def1/ql/src/test/queries/clientpositive/results_cache_invalidation.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/results_cache_invalidation.q b/ql/src/test/queries/clientpositive/results_cache_invalidation.q new file mode 100644 index 0000000..b69bdf2 --- /dev/null +++ b/ql/src/test/queries/clientpositive/results_cache_invalidation.q @@ -0,0 +1,89 @@ + +set hive.support.concurrency=true; +set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager; + +create table tab1 (key string, value string) stored as orc tblproperties ('transactional'='true'); +create table tab2 (key string, value string) stored as orc tblproperties ('transactional'='true'); + +insert into tab1 select * from default.src; +insert into tab2 select * from default.src; + +set hive.query.results.cache.enabled=true; + +set test.comment="Run queries to load into cache"; +set test.comment; + +-- Q1 +explain +select count(*) from tab1 a where key >= 0; +select count(*) from tab1 a where key >= 0; + +-- Q2 +explain +select max(key) from tab2; +select max(key) from tab2; + +-- Q3 +explain +select count(*) from tab1 join tab2 on (tab1.key = tab2.key); +select count(*) from tab1 join tab2 on (tab1.key = tab2.key); + +set test.comment="Q1 should now be able to use cache"; +set test.comment; +explain +select count(*) from tab1 a where key >= 0; +select count(*) from tab1 a where key >= 0; + +set test.comment="Q2 should now be able to use cache"; +set test.comment; +explain +select max(key) from tab2; +select max(key) from tab2; + +set test.comment="Q3 should now be able to use cache"; +set test.comment; +explain +select count(*) from tab1 join tab2 on (tab1.key = tab2.key); +select count(*) from tab1 join tab2 on (tab1.key = tab2.key); + +-- Update tab1 which should invalidate Q1 and Q3. +insert into tab1 values ('88', 'val_88'); + +set test.comment="Q1 should not use cache"; +set test.comment; +explain +select count(*) from tab1 a where key >= 0; +select count(*) from tab1 a where key >= 0; + +set test.comment="Q2 should still use cache since tab2 not updated"; +set test.comment; +explain +select max(key) from tab2; +select max(key) from tab2; + +set test.comment="Q3 should not use cache"; +set test.comment; +explain +select count(*) from tab1 join tab2 on (tab1.key = tab2.key); +select count(*) from tab1 join tab2 on (tab1.key = tab2.key); + +-- Update tab2 which should invalidate Q2 and Q3. +insert into tab2 values ('88', 'val_88'); + +set test.comment="Q1 should use cache"; +set test.comment; +explain +select count(*) from tab1 a where key >= 0; +select count(*) from tab1 a where key >= 0; + +set test.comment="Q2 should not use cache"; +set test.comment; +explain +select max(key) from tab2; +select max(key) from tab2; + +set test.comment="Q3 should not use cache"; +set test.comment; +explain +select count(*) from tab1 join tab2 on (tab1.key = tab2.key); +select count(*) from tab1 join tab2 on (tab1.key = tab2.key); http://git-wip-us.apache.org/repos/asf/hive/blob/7948def1/ql/src/test/queries/clientpositive/results_cache_lifetime.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/results_cache_lifetime.q b/ql/src/test/queries/clientpositive/results_cache_lifetime.q index 60ffe96..1c306e1 100644 --- a/ql/src/test/queries/clientpositive/results_cache_lifetime.q +++ b/ql/src/test/queries/clientpositive/results_cache_lifetime.q @@ -1,5 +1,6 @@ set hive.query.results.cache.enabled=true; +set hive.query.results.cache.nontransactional.tables.enabled=true; set hive.query.results.cache.max.entry.lifetime=2; -- This query used the cache from results_cache_1.q. Load it up. http://git-wip-us.apache.org/repos/asf/hive/blob/7948def1/ql/src/test/queries/clientpositive/results_cache_quoted_identifiers.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/results_cache_quoted_identifiers.q b/ql/src/test/queries/clientpositive/results_cache_quoted_identifiers.q index 4802f43..c5684f0 100644 --- a/ql/src/test/queries/clientpositive/results_cache_quoted_identifiers.q +++ b/ql/src/test/queries/clientpositive/results_cache_quoted_identifiers.q @@ -9,6 +9,7 @@ create table quoted1 ( insert into quoted1 select key, key, value, value from src; set hive.query.results.cache.enabled=true; +set hive.query.results.cache.nontransactional.tables.enabled=true; explain select max(`_c1`), max(`int`), max(`col 3`), max(`col``4`) from quoted1; http://git-wip-us.apache.org/repos/asf/hive/blob/7948def1/ql/src/test/queries/clientpositive/results_cache_temptable.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/results_cache_temptable.q b/ql/src/test/queries/clientpositive/results_cache_temptable.q index 9e0de76..7e16702 100644 --- a/ql/src/test/queries/clientpositive/results_cache_temptable.q +++ b/ql/src/test/queries/clientpositive/results_cache_temptable.q @@ -1,4 +1,5 @@ set hive.query.results.cache.enabled=true; +set hive.query.results.cache.nontransactional.tables.enabled=true; create table rct (key string, value string); load data local inpath '../../data/files/kv1.txt' overwrite into table rct; http://git-wip-us.apache.org/repos/asf/hive/blob/7948def1/ql/src/test/queries/clientpositive/results_cache_transactional.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/results_cache_transactional.q b/ql/src/test/queries/clientpositive/results_cache_transactional.q new file mode 100644 index 0000000..9181c6d --- /dev/null +++ b/ql/src/test/queries/clientpositive/results_cache_transactional.q @@ -0,0 +1,56 @@ + +set hive.support.concurrency=true; +set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager; + +create table tab1 (key string, value string) stored as orc tblproperties ('transactional'='true'); +create table tab2 (key string, value string) stored as orc tblproperties ('transactional'='true'); + +insert into tab1 select * from default.src; +insert into tab2 select * from default.src; + +set hive.query.results.cache.enabled=true; +set hive.query.results.cache.nontransactional.tables.enabled=false; + +explain +select max(key) from tab1; +select max(key) from tab1; + +set test.comment="Query on transactional table should use cache"; +set test.comment; +explain +select max(key) from tab1; +select max(key) from tab1; + +explain +select count(*) from tab1 join tab2 on (tab1.key = tab2.key); +select count(*) from tab1 join tab2 on (tab1.key = tab2.key); + +set test.comment="Join on transactional tables, should use cache"; +set test.comment; +explain +select count(*) from tab1 join tab2 on (tab1.key = tab2.key); +select count(*) from tab1 join tab2 on (tab1.key = tab2.key); + + +-- Non-transactional tables + +explain +select max(key) from src; +select max(key) from src; + +set test.comment="Query on non-transactional table should not use cache"; +set test.comment; +explain +select max(key) from src; +select max(key) from src; + +explain +select count(*) from tab1 join src on (tab1.key = src.key); +select count(*) from tab1 join src on (tab1.key = src.key); + +set test.comment="Join uses non-transactional table, should not use cache"; +set test.comment; +explain +select count(*) from tab1 join src on (tab1.key = src.key); +select count(*) from tab1 join src on (tab1.key = src.key); + http://git-wip-us.apache.org/repos/asf/hive/blob/7948def1/ql/src/test/queries/clientpositive/results_cache_with_masking.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/results_cache_with_masking.q b/ql/src/test/queries/clientpositive/results_cache_with_masking.q index b4fcdd5..d353598 100644 --- a/ql/src/test/queries/clientpositive/results_cache_with_masking.q +++ b/ql/src/test/queries/clientpositive/results_cache_with_masking.q @@ -3,6 +3,7 @@ set hive.mapred.mode=nonstrict; set hive.security.authorization.manager=org.apache.hadoop.hive.ql.security.authorization.plugin.sqlstd.SQLStdHiveAuthorizerFactoryForTest; set hive.query.results.cache.enabled=true; +set hive.query.results.cache.nontransactional.tables.enabled=true; create table masking_test as select cast(key as int) as key, value from src; http://git-wip-us.apache.org/repos/asf/hive/blob/7948def1/ql/src/test/results/clientpositive/llap/results_cache_invalidation.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/llap/results_cache_invalidation.q.out b/ql/src/test/results/clientpositive/llap/results_cache_invalidation.q.out new file mode 100644 index 0000000..c76de92 --- /dev/null +++ b/ql/src/test/results/clientpositive/llap/results_cache_invalidation.q.out @@ -0,0 +1,793 @@ +PREHOOK: query: create table tab1 (key string, value string) stored as orc tblproperties ('transactional'='true') +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@tab1 +POSTHOOK: query: create table tab1 (key string, value string) stored as orc tblproperties ('transactional'='true') +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@tab1 +PREHOOK: query: create table tab2 (key string, value string) stored as orc tblproperties ('transactional'='true') +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@tab2 +POSTHOOK: query: create table tab2 (key string, value string) stored as orc tblproperties ('transactional'='true') +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@tab2 +PREHOOK: query: insert into tab1 select * from default.src +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@tab1 +POSTHOOK: query: insert into tab1 select * from default.src +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@tab1 +POSTHOOK: Lineage: tab1.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: tab1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: insert into tab2 select * from default.src +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@tab2 +POSTHOOK: query: insert into tab2 select * from default.src +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@tab2 +POSTHOOK: Lineage: tab2.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: tab2.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +test.comment="Run queries to load into cache" +PREHOOK: query: explain +select count(*) from tab1 a where key >= 0 +PREHOOK: type: QUERY +POSTHOOK: query: explain +select count(*) from tab1 a where key >= 0 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 91 Data size: 16192 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (UDFToDouble(key) >= 0.0D) (type: boolean) + Statistics: Num rows: 30 Data size: 5338 Basic stats: COMPLETE Column stats: NONE + Select Operator + Statistics: Num rows: 30 Data size: 5338 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 192 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 192 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Execution mode: llap + LLAP IO: may be used (ACID table) + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 192 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 192 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select count(*) from tab1 a where key >= 0 +PREHOOK: type: QUERY +PREHOOK: Input: default@tab1 +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from tab1 a where key >= 0 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tab1 +#### A masked pattern was here #### +500 +PREHOOK: query: explain +select max(key) from tab2 +PREHOOK: type: QUERY +POSTHOOK: query: explain +select max(key) from tab2 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: tab2 + Statistics: Num rows: 91 Data size: 16192 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string) + outputColumnNames: key + Statistics: Num rows: 91 Data size: 16192 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: max(key) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 368 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 368 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string) + Execution mode: llap + LLAP IO: may be used (ACID table) + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: max(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 368 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 368 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select max(key) from tab2 +PREHOOK: type: QUERY +PREHOOK: Input: default@tab2 +#### A masked pattern was here #### +POSTHOOK: query: select max(key) from tab2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tab2 +#### A masked pattern was here #### +98 +PREHOOK: query: explain +select count(*) from tab1 join tab2 on (tab1.key = tab2.key) +PREHOOK: type: QUERY +POSTHOOK: query: explain +select count(*) from tab1 join tab2 on (tab1.key = tab2.key) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: tab1 + Statistics: Num rows: 91 Data size: 16192 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 87 Data size: 15480 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 87 Data size: 15480 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 87 Data size: 15480 Basic stats: COMPLETE Column stats: NONE + Execution mode: llap + LLAP IO: may be used (ACID table) + Map 4 + Map Operator Tree: + TableScan + alias: tab2 + Statistics: Num rows: 91 Data size: 16192 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 87 Data size: 15480 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 87 Data size: 15480 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 87 Data size: 15480 Basic stats: COMPLETE Column stats: NONE + Execution mode: llap + LLAP IO: may be used (ACID table) + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + Statistics: Num rows: 95 Data size: 17028 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select count(*) from tab1 join tab2 on (tab1.key = tab2.key) +PREHOOK: type: QUERY +PREHOOK: Input: default@tab1 +PREHOOK: Input: default@tab2 +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from tab1 join tab2 on (tab1.key = tab2.key) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tab1 +POSTHOOK: Input: default@tab2 +#### A masked pattern was here #### +1028 +test.comment="Q1 should now be able to use cache" +PREHOOK: query: explain +select count(*) from tab1 a where key >= 0 +PREHOOK: type: QUERY +POSTHOOK: query: explain +select count(*) from tab1 a where key >= 0 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + Cached Query Result: true + +PREHOOK: query: select count(*) from tab1 a where key >= 0 +PREHOOK: type: QUERY +PREHOOK: Input: default@tab1 +POSTHOOK: query: select count(*) from tab1 a where key >= 0 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tab1 +500 +test.comment="Q2 should now be able to use cache" +PREHOOK: query: explain +select max(key) from tab2 +PREHOOK: type: QUERY +POSTHOOK: query: explain +select max(key) from tab2 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + Cached Query Result: true + +PREHOOK: query: select max(key) from tab2 +PREHOOK: type: QUERY +PREHOOK: Input: default@tab2 +POSTHOOK: query: select max(key) from tab2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tab2 +98 +test.comment="Q3 should now be able to use cache" +PREHOOK: query: explain +select count(*) from tab1 join tab2 on (tab1.key = tab2.key) +PREHOOK: type: QUERY +POSTHOOK: query: explain +select count(*) from tab1 join tab2 on (tab1.key = tab2.key) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + Cached Query Result: true + +PREHOOK: query: select count(*) from tab1 join tab2 on (tab1.key = tab2.key) +PREHOOK: type: QUERY +PREHOOK: Input: default@tab1 +PREHOOK: Input: default@tab2 +POSTHOOK: query: select count(*) from tab1 join tab2 on (tab1.key = tab2.key) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tab1 +POSTHOOK: Input: default@tab2 +1028 +PREHOOK: query: insert into tab1 values ('88', 'val_88') +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@tab1 +POSTHOOK: query: insert into tab1 values ('88', 'val_88') +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@tab1 +POSTHOOK: Lineage: tab1.key SCRIPT [] +POSTHOOK: Lineage: tab1.value SCRIPT [] +test.comment="Q1 should not use cache" +PREHOOK: query: explain +select count(*) from tab1 a where key >= 0 +PREHOOK: type: QUERY +POSTHOOK: query: explain +select count(*) from tab1 a where key >= 0 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 110 Data size: 19504 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (UDFToDouble(key) >= 0.0D) (type: boolean) + Statistics: Num rows: 36 Data size: 6383 Basic stats: COMPLETE Column stats: NONE + Select Operator + Statistics: Num rows: 36 Data size: 6383 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 192 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 192 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Execution mode: llap + LLAP IO: may be used (ACID table) + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 192 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 192 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select count(*) from tab1 a where key >= 0 +PREHOOK: type: QUERY +PREHOOK: Input: default@tab1 +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from tab1 a where key >= 0 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tab1 +#### A masked pattern was here #### +501 +test.comment="Q2 should still use cache since tab2 not updated" +PREHOOK: query: explain +select max(key) from tab2 +PREHOOK: type: QUERY +POSTHOOK: query: explain +select max(key) from tab2 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + Cached Query Result: true + +PREHOOK: query: select max(key) from tab2 +PREHOOK: type: QUERY +PREHOOK: Input: default@tab2 +POSTHOOK: query: select max(key) from tab2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tab2 +98 +test.comment="Q3 should not use cache" +PREHOOK: query: explain +select count(*) from tab1 join tab2 on (tab1.key = tab2.key) +PREHOOK: type: QUERY +POSTHOOK: query: explain +select count(*) from tab1 join tab2 on (tab1.key = tab2.key) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: tab1 + Statistics: Num rows: 110 Data size: 19504 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 105 Data size: 18617 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 105 Data size: 18617 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 105 Data size: 18617 Basic stats: COMPLETE Column stats: NONE + Execution mode: llap + LLAP IO: may be used (ACID table) + Map 4 + Map Operator Tree: + TableScan + alias: tab2 + Statistics: Num rows: 91 Data size: 16192 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 87 Data size: 15480 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 87 Data size: 15480 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 87 Data size: 15480 Basic stats: COMPLETE Column stats: NONE + Execution mode: llap + LLAP IO: may be used (ACID table) + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + Statistics: Num rows: 115 Data size: 20478 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select count(*) from tab1 join tab2 on (tab1.key = tab2.key) +PREHOOK: type: QUERY +PREHOOK: Input: default@tab1 +PREHOOK: Input: default@tab2 +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from tab1 join tab2 on (tab1.key = tab2.key) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tab1 +POSTHOOK: Input: default@tab2 +#### A masked pattern was here #### +1028 +PREHOOK: query: insert into tab2 values ('88', 'val_88') +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@tab2 +POSTHOOK: query: insert into tab2 values ('88', 'val_88') +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@tab2 +POSTHOOK: Lineage: tab2.key SCRIPT [] +POSTHOOK: Lineage: tab2.value SCRIPT [] +test.comment="Q1 should use cache" +PREHOOK: query: explain +select count(*) from tab1 a where key >= 0 +PREHOOK: type: QUERY +POSTHOOK: query: explain +select count(*) from tab1 a where key >= 0 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + Cached Query Result: true + +PREHOOK: query: select count(*) from tab1 a where key >= 0 +PREHOOK: type: QUERY +PREHOOK: Input: default@tab1 +POSTHOOK: query: select count(*) from tab1 a where key >= 0 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tab1 +501 +test.comment="Q2 should not use cache" +PREHOOK: query: explain +select max(key) from tab2 +PREHOOK: type: QUERY +POSTHOOK: query: explain +select max(key) from tab2 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: tab2 + Statistics: Num rows: 110 Data size: 19504 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string) + outputColumnNames: key + Statistics: Num rows: 110 Data size: 19504 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: max(key) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 368 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 368 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string) + Execution mode: llap + LLAP IO: may be used (ACID table) + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: max(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 368 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 368 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select max(key) from tab2 +PREHOOK: type: QUERY +PREHOOK: Input: default@tab2 +#### A masked pattern was here #### +POSTHOOK: query: select max(key) from tab2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tab2 +#### A masked pattern was here #### +98 +test.comment="Q3 should not use cache" +PREHOOK: query: explain +select count(*) from tab1 join tab2 on (tab1.key = tab2.key) +PREHOOK: type: QUERY +POSTHOOK: query: explain +select count(*) from tab1 join tab2 on (tab1.key = tab2.key) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: tab1 + Statistics: Num rows: 110 Data size: 19504 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 105 Data size: 18617 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 105 Data size: 18617 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 105 Data size: 18617 Basic stats: COMPLETE Column stats: NONE + Execution mode: llap + LLAP IO: may be used (ACID table) + Map 4 + Map Operator Tree: + TableScan + alias: tab2 + Statistics: Num rows: 110 Data size: 19504 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 105 Data size: 18617 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 105 Data size: 18617 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 105 Data size: 18617 Basic stats: COMPLETE Column stats: NONE + Execution mode: llap + LLAP IO: may be used (ACID table) + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + Statistics: Num rows: 115 Data size: 20478 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select count(*) from tab1 join tab2 on (tab1.key = tab2.key) +PREHOOK: type: QUERY +PREHOOK: Input: default@tab1 +PREHOOK: Input: default@tab2 +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from tab1 join tab2 on (tab1.key = tab2.key) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tab1 +POSTHOOK: Input: default@tab2 +#### A masked pattern was here #### +1029