This is an automated email from the ASF dual-hosted git repository.
krisztiankasa pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git
The following commit(s) were added to refs/heads/master by this push:
new f2d29ef HIVE-25768: Extend lifetime of query-level HMS response cache
(John Sherman, reviewed by Stephen Carlin, Krisztian Kasa)
f2d29ef is described below
commit f2d29efb557016a04987ab352c2c9609475f84d3
Author: John Sherman <[email protected]>
AuthorDate: Fri Dec 10 02:52:16 2021 -0800
HIVE-25768: Extend lifetime of query-level HMS response cache (John
Sherman, reviewed by Stephen Carlin, Krisztian Kasa)
---
.../apache/hadoop/hive/ql/DriverTxnHandler.java | 3 ++
.../java/org/apache/hadoop/hive/ql/Executor.java | 4 +++
.../java/org/apache/hadoop/hive/ql/QueryState.java | 41 +++++++++++++++++++++-
.../hadoop/hive/ql/parse/SemanticAnalyzer.java | 22 +-----------
.../hadoop/hive/ql/session/SessionState.java | 29 +++------------
5 files changed, 53 insertions(+), 46 deletions(-)
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/DriverTxnHandler.java
b/ql/src/java/org/apache/hadoop/hive/ql/DriverTxnHandler.java
index a6d3481..09fc767 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/DriverTxnHandler.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/DriverTxnHandler.java
@@ -224,6 +224,8 @@ class DriverTxnHandler {
}
try {
+ // Ensure we answer any metadata calls with fresh responses
+ driverContext.getQueryState().disableHMSCache();
setWriteIdForAcidFileSinks();
allocateWriteIdForAcidAnalyzeTable();
boolean hasAcidDdl = setWriteIdForAcidDdl();
@@ -245,6 +247,7 @@ class DriverTxnHandler {
throw DriverUtils.createProcessorException(driverContext, 10,
errorMessage, ErrorMsg.findSQLState(e.getMessage()),
e);
} finally {
+ driverContext.getQueryState().enableHMSCache();
perfLogger.perfLogEnd(CLASS_NAME, PerfLogger.ACQUIRE_READ_WRITE_LOCKS);
}
}
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/Executor.java
b/ql/src/java/org/apache/hadoop/hive/ql/Executor.java
index ce47d17..6b5ee95 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/Executor.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/Executor.java
@@ -105,7 +105,10 @@ public class Executor {
preExecutionActions();
preExecutionCacheActions();
+ // Disable HMS cache so any metadata calls during execution get fresh
responses.
+ driverContext.getQueryState().disableHMSCache();
runTasks(noName);
+ driverContext.getQueryState().enableHMSCache();
postExecutionCacheActions();
postExecutionActions();
} catch (CommandProcessorException cpe) {
@@ -118,6 +121,7 @@ public class Executor {
handleException(hookContext, e);
} finally {
cleanUp(noName, hookContext, executionError);
+ driverContext.getQueryState().enableHMSCache();
}
}
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/QueryState.java
b/ql/src/java/org/apache/hadoop/hive/ql/QueryState.java
index 5edebca..b39037c 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/QueryState.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/QueryState.java
@@ -30,12 +30,17 @@ import org.apache.hadoop.hive.ql.session.SessionState;
import org.apache.hadoop.mapreduce.MRJobConfig;
import org.apache.tez.dag.api.TezConfiguration;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
/**
* The class to store query level info such as queryId. Multiple queries can
run
* in the same session, so SessionState is to hold common session related
info, and
* each QueryState is to hold query related info.
*/
public class QueryState {
+ private static final Logger LOG = LoggerFactory.getLogger(QueryState.class);
+
/**
* current configuration.
*/
@@ -68,6 +73,17 @@ public class QueryState {
private final Map<String, Object> resourceMap = new HashMap<>();
/**
+ * Cache of HMS requests/responses utilized by SessionHiveMetaStoreClient.
+ */
+ private Map<Object, Object> hmsCache;
+
+ /**
+ * Tracks if HMS cache should be used to answer metadata requests.
+ * In some sections, it makes sense to disable the cache to get fresh
responses.
+ */
+ private boolean hmsCacheEnabled;
+
+ /**
* query level lock for ConditionalTask#resolveTask.
*/
private final ReentrantLock resolveConditionalTaskLock = new
ReentrantLock(true);
@@ -82,13 +98,36 @@ public class QueryState {
// Get the query id stored in query specific config.
public String getQueryId() {
- return (queryConf.getVar(HiveConf.ConfVars.HIVEQUERYID));
+ return queryConf.getVar(HiveConf.ConfVars.HIVEQUERYID);
}
public String getQueryString() {
return queryConf.getQueryString();
}
+ // Returns the HMS cache if it is currently enabled
+ public Map<Object, Object> getHMSCache() {
+ return hmsCacheEnabled ? hmsCache : null;
+ }
+
+ /**
+ * Disable the HMS cache. Useful in situations when you
+ * must not get cached metadata responses.
+ */
+ public void disableHMSCache() {
+ hmsCacheEnabled = false;
+ }
+
+ public void enableHMSCache() {
+ hmsCacheEnabled = true;
+ }
+
+ public void createHMSCache() {
+ LOG.info("Query-level HMS cache created for {}", getQueryId());
+ hmsCache = new HashMap<>();
+ hmsCacheEnabled = true;
+ }
+
public String getCommandType() {
if (commandType == null) {
return null;
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
index 77aa78f..cb60d6c 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
@@ -15361,28 +15361,8 @@ public class SemanticAnalyzer extends
BaseSemanticAnalyzer {
@Override
public void startAnalysis() {
- String queryId = conf.getVar(HiveConf.ConfVars.HIVEQUERYID);
- SessionState ss = SessionState.get();
- if (ss == null) {
- LOG.info("No current SessionState, skipping metadata query-level caching
for: {}", queryId);
- return;
- }
- if (conf.getBoolVar(ConfVars.HIVE_OPTIMIZE_HMS_QUERY_CACHE_ENABLED)) {
- LOG.info("Starting caching scope for: {}", queryId);
- ss.startScope(queryId);
- }
- }
-
- @Override
- public void endAnalysis() {
- SessionState ss = SessionState.get();
- if (ss == null) {
- return;
- }
if (conf.getBoolVar(ConfVars.HIVE_OPTIMIZE_HMS_QUERY_CACHE_ENABLED)) {
- String queryId = conf.getVar(HiveConf.ConfVars.HIVEQUERYID);
- LOG.info("Ending caching scope for: {}", queryId);
- ss.endScope(queryId);
+ queryState.createHMSCache();
}
}
}
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/session/SessionState.java
b/ql/src/java/org/apache/hadoop/hive/ql/session/SessionState.java
index 18bbd9e..5227d90 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/session/SessionState.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/session/SessionState.java
@@ -278,11 +278,6 @@ public class SessionState implements ISessionAuthState{
*/
private Map<URI, HadoopShims.HdfsEncryptionShim> hdfsEncryptionShims =
Maps.newHashMap();
- /**
- * Cache for Erasure Coding shims.
- */
- private Map<URI, HadoopShims.HdfsErasureCodingShim> erasureCodingShims;
-
private final String userName;
/**
@@ -2207,30 +2202,16 @@ public class SessionState implements ISessionAuthState{
}
/**
- * Can be called when we start compilation of a query.
- * @param queryId the unique identifier of the query
- */
- public void startScope(String queryId) {
- Map<Object, Object> existingVal = cache.put(queryId, new HashMap<>());
- Preconditions.checkState(existingVal == null);
- }
-
- /**
- * Can be called when we end compilation of a query.
- * @param queryId the unique identifier of the query
- */
- public void endScope(String queryId) {
- Map<Object, Object> existingVal = cache.remove(queryId);
- Preconditions.checkState(existingVal != null);
- }
-
- /**
* Retrieves the query cache for the given query.
* @param queryId the unique identifier of the query
* @return the cache for the query
*/
public Map<Object, Object> getQueryCache(String queryId) {
- return cache.get(queryId);
+ QueryState qs = getQueryState(queryId);
+ if (qs == null) {
+ return null;
+ }
+ return qs.getHMSCache();
}
public Hive getHiveDb() throws HiveException {