This is an automated email from the ASF dual-hosted git repository.
dengzh pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git
The following commit(s) were added to refs/heads/master by this push:
new e0457688cac HIVE-29330: HiveMetaStoreAuthorizer creating new HiveConf
per thread increases overhead (#6205)
e0457688cac is described below
commit e0457688cacf0825d6d37d14824085c88deb5cbc
Author: dengzh <[email protected]>
AuthorDate: Fri Dec 19 08:41:35 2025 +0800
HIVE-29330: HiveMetaStoreAuthorizer creating new HiveConf per thread
increases overhead (#6205)
---
.../java/org/apache/hadoop/hive/conf/HiveConf.java | 28 ++++++++++++
.../table/execute/AlterTableExecuteAnalyzer.java | 29 +++++-------
.../AlterTableCreateSnapshotRefAnalyzer.java | 5 +--
.../rules/views/HiveMaterializedViewUtils.java | 2 +-
.../optimizer/physical/GenMRSkewJoinProcessor.java | 3 +-
.../plugin/HiveMetastoreClientFactoryImpl.java | 9 +++-
.../plugin/metastore/HiveMetaStoreAuthorizer.java | 51 +++++++---------------
.../hadoop/hive/ql/session/SessionState.java | 2 +-
.../hive/ql/txn/compactor/CompactorThread.java | 2 +-
.../org/apache/hadoop/hive/ql/udf/UDFUtils.java | 4 +-
10 files changed, 70 insertions(+), 65 deletions(-)
diff --git a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
index 966a97150f5..48eecfac4ad 100644
--- a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
+++ b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
@@ -6551,6 +6551,19 @@ public HiveConf(Configuration other, Class<?> cls) {
initialize(cls);
}
+ /**
+ * For internal use only, assumed the "other" has loaded all properties that
intend to use
+ * and want to cast it to a HiveConf without extra re-loading the source
file.
+ * @param other The Configuration whose properties are to be wrapped by this
HiveConf.
+ */
+ private HiveConf(Configuration other) {
+ super(other);
+ setupRestrictList();
+ hiddenSet.addAll(HiveConfUtil.getHiddenSet(other));
+ lockedSet.addAll(HiveConfUtil.getLockedSet(other));
+ origProp = getProperties(other);
+ }
+
/**
* Copy constructor
*/
@@ -7313,4 +7326,19 @@ public void syncFromConf(HiveConf conf) {
set(e.getKey(), e.getValue());
}
}
+
+ /**
+ * Sometimes if the configuration contains all the information we want,
+ * but want to cast it to a HiveConf, without loading the props from the
+ * source file again, which is wasteful and might cost dozens of
milliseconds.
+ * @param configuration The original configuration
+ * @return A HiveConf wrapping on the original configuration
+ */
+ public static HiveConf cloneConf(Configuration configuration) {
+ if (configuration instanceof HiveConf config) {
+ return new HiveConf(config);
+ } else {
+ return new HiveConf(configuration);
+ }
+ }
}
diff --git
a/ql/src/java/org/apache/hadoop/hive/ql/ddl/table/execute/AlterTableExecuteAnalyzer.java
b/ql/src/java/org/apache/hadoop/hive/ql/ddl/table/execute/AlterTableExecuteAnalyzer.java
index 96b2bdac748..baba8353430 100644
---
a/ql/src/java/org/apache/hadoop/hive/ql/ddl/table/execute/AlterTableExecuteAnalyzer.java
+++
b/ql/src/java/org/apache/hadoop/hive/ql/ddl/table/execute/AlterTableExecuteAnalyzer.java
@@ -46,7 +46,6 @@
import org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc;
import org.apache.hadoop.hive.ql.plan.ExprNodeDesc;
import org.apache.hadoop.hive.ql.plan.PlanUtils;
-import org.apache.hadoop.hive.ql.session.SessionState;
import java.time.ZoneId;
import java.util.List;
@@ -109,14 +108,14 @@ protected void analyzeCommand(TableName tableName,
Map<String, String> partition
rootTasks.add(TaskFactory.get(new DDLWork(getInputs(), getOutputs(),
desc)));
}
- private static AlterTableExecuteDesc getCherryPickDesc(TableName tableName,
Map<String, String> partitionSpec,
+ private AlterTableExecuteDesc getCherryPickDesc(TableName tableName,
Map<String, String> partitionSpec,
ASTNode childNode) throws SemanticException {
long snapshotId = Long.parseLong(childNode.getText());
AlterTableExecuteSpec spec = new AlterTableExecuteSpec(CHERRY_PICK, new
CherryPickSpec(snapshotId));
return new AlterTableExecuteDesc(tableName, partitionSpec, spec);
}
- private static AlterTableExecuteDesc getFastForwardDesc(TableName tableName,
Map<String, String> partitionSpec,
+ private AlterTableExecuteDesc getFastForwardDesc(TableName tableName,
Map<String, String> partitionSpec,
ASTNode command) throws SemanticException {
String branchName;
String targetBranchName;
@@ -135,7 +134,7 @@ private static AlterTableExecuteDesc
getFastForwardDesc(TableName tableName, Map
return new AlterTableExecuteDesc(tableName, partitionSpec, spec);
}
- private static AlterTableExecuteDesc getSetCurrentSnapshotDesc(TableName
tableName, Map<String, String> partitionSpec,
+ private AlterTableExecuteDesc getSetCurrentSnapshotDesc(TableName tableName,
Map<String, String> partitionSpec,
ASTNode childNode) throws SemanticException {
AlterTableExecuteSpec<AlterTableExecuteSpec.SetCurrentSnapshotSpec> spec =
new AlterTableExecuteSpec(SET_CURRENT_SNAPSHOT,
@@ -143,16 +142,14 @@ private static AlterTableExecuteDesc
getSetCurrentSnapshotDesc(TableName tableNa
return new AlterTableExecuteDesc(tableName, partitionSpec, spec);
}
- private static AlterTableExecuteDesc getExpireSnapshotDesc(TableName
tableName, Map<String, String> partitionSpec,
+ private AlterTableExecuteDesc getExpireSnapshotDesc(TableName tableName,
Map<String, String> partitionSpec,
List<Node> children, HiveConf conf) throws SemanticException {
AlterTableExecuteSpec<ExpireSnapshotsSpec> spec;
if (children.size() == 1) {
spec = new AlterTableExecuteSpec(EXPIRE_SNAPSHOT, null);
return new AlterTableExecuteDesc(tableName, partitionSpec, spec);
}
- ZoneId timeZone = SessionState.get() == null ?
- new HiveConf().getLocalTimeZone() :
- SessionState.get().getConf().getLocalTimeZone();
+ ZoneId timeZone = conf.getLocalTimeZone();
ASTNode firstNode = (ASTNode) children.get(1);
String firstNodeText = PlanUtils.stripQuotes(firstNode.getText().trim());
if (firstNode.getType() == KW_RETAIN) {
@@ -176,7 +173,7 @@ private static AlterTableExecuteDesc
getExpireSnapshotDesc(TableName tableName,
return new AlterTableExecuteDesc(tableName, partitionSpec, spec);
}
- private static String getTimeStampString(HiveConf conf, ASTNode node, String
nodeText) throws SemanticException {
+ private String getTimeStampString(HiveConf conf, ASTNode node, String
nodeText) throws SemanticException {
if (node.getChildCount() > 0) {
QueryState queryState = new
QueryState.Builder().withGenerateNewQueryId(false).withHiveConf(conf).build();
SemanticAnalyzer sem = (SemanticAnalyzer)
SemanticAnalyzerFactory.get(queryState, node);
@@ -190,14 +187,12 @@ private static String getTimeStampString(HiveConf conf,
ASTNode node, String nod
return nodeText;
}
- private static AlterTableExecuteDesc getRollbackDesc(TableName tableName,
Map<String, String> partitionSpec,
+ private AlterTableExecuteDesc getRollbackDesc(TableName tableName,
Map<String, String> partitionSpec,
ASTNode childNode) throws SemanticException {
AlterTableExecuteSpec<RollbackSpec> spec;
// the child must be the rollback parameter
if (childNode.getType() == HiveParser.StringLiteral) {
- ZoneId timeZone = SessionState.get() == null ?
- new HiveConf().getLocalTimeZone() :
- SessionState.get().getConf().getLocalTimeZone();
+ ZoneId timeZone = conf.getLocalTimeZone();
TimestampTZ time =
TimestampTZUtil.parse(PlanUtils.stripQuotes(childNode.getText()), timeZone);
spec = new AlterTableExecuteSpec(ROLLBACK, new RollbackSpec(TIME,
time.toEpochMilli()));
} else {
@@ -206,7 +201,7 @@ private static AlterTableExecuteDesc
getRollbackDesc(TableName tableName, Map<St
return new AlterTableExecuteDesc(tableName, partitionSpec, spec);
}
- private static AlterTableExecuteDesc getDeleteOrphanFilesDesc(TableName
tableName, Map<String, String> partitionSpec,
+ private AlterTableExecuteDesc getDeleteOrphanFilesDesc(TableName tableName,
Map<String, String> partitionSpec,
List<Node> children) throws SemanticException {
long time = System.currentTimeMillis() - TimeUnit.DAYS.toMillis(3);
@@ -217,11 +212,9 @@ private static AlterTableExecuteDesc
getDeleteOrphanFilesDesc(TableName tableNam
return new AlterTableExecuteDesc(tableName, partitionSpec, spec);
}
- private static long getTimeStampMillis(ASTNode childNode) {
+ private long getTimeStampMillis(ASTNode childNode) {
String childNodeText = PlanUtils.stripQuotes(childNode.getText());
- ZoneId timeZone = SessionState.get() == null ?
- new HiveConf().getLocalTimeZone() :
- SessionState.get().getConf().getLocalTimeZone();
+ ZoneId timeZone = conf.getLocalTimeZone();
TimestampTZ time =
TimestampTZUtil.parse(PlanUtils.stripQuotes(childNodeText), timeZone);
return time.toEpochMilli();
}
diff --git
a/ql/src/java/org/apache/hadoop/hive/ql/ddl/table/snapshotref/AlterTableCreateSnapshotRefAnalyzer.java
b/ql/src/java/org/apache/hadoop/hive/ql/ddl/table/snapshotref/AlterTableCreateSnapshotRefAnalyzer.java
index b4ec5380e3a..7e89c36275f 100644
---
a/ql/src/java/org/apache/hadoop/hive/ql/ddl/table/snapshotref/AlterTableCreateSnapshotRefAnalyzer.java
+++
b/ql/src/java/org/apache/hadoop/hive/ql/ddl/table/snapshotref/AlterTableCreateSnapshotRefAnalyzer.java
@@ -25,7 +25,6 @@
import org.apache.hadoop.hive.common.TableName;
import org.apache.hadoop.hive.common.type.TimestampTZ;
import org.apache.hadoop.hive.common.type.TimestampTZUtil;
-import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.ql.QueryState;
import org.apache.hadoop.hive.ql.ddl.DDLUtils;
import org.apache.hadoop.hive.ql.ddl.DDLWork;
@@ -39,7 +38,6 @@
import org.apache.hadoop.hive.ql.parse.AlterTableSnapshotRefSpec;
import org.apache.hadoop.hive.ql.parse.HiveParser;
import org.apache.hadoop.hive.ql.parse.SemanticException;
-import org.apache.hadoop.hive.ql.session.SessionState;
public abstract class AlterTableCreateSnapshotRefAnalyzer extends
AbstractAlterTableAnalyzer {
protected AlterTableType alterTableType;
@@ -72,8 +70,7 @@ protected void analyzeCommand(TableName tableName,
Map<String, String> partition
snapshotId = Long.parseLong(childNode.getChild(0).getText());
break;
case HiveParser.TOK_AS_OF_TIME:
- ZoneId timeZone = SessionState.get() == null ? new
HiveConf().getLocalTimeZone() :
- SessionState.get().getConf().getLocalTimeZone();
+ ZoneId timeZone = conf.getLocalTimeZone();
TimestampTZ ts =
TimestampTZUtil.parse(stripQuotes(childNode.getChild(0).getText()), timeZone);
asOfTime = ts.toEpochMilli();
break;
diff --git
a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/views/HiveMaterializedViewUtils.java
b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/views/HiveMaterializedViewUtils.java
index fb7349adfc5..d3b92d58569 100644
---
a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/views/HiveMaterializedViewUtils.java
+++
b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/views/HiveMaterializedViewUtils.java
@@ -569,7 +569,7 @@ public static RelOptMaterialization
createCTEMaterialization(String viewName, Re
hiveTable.setMaterializedTable(true);
RelOptHiveTable optTable =
new RelOptHiveTable(null, cluster.getTypeFactory(), fullName,
body.getRowType(), hiveTable, columns,
- Collections.emptyList(), Collections.emptyList(), new HiveConf(),
new QueryTables(true), new HashMap<>(),
+ Collections.emptyList(), Collections.emptyList(), conf, new
QueryTables(true), new HashMap<>(),
new HashMap<>(), new AtomicInteger());
optTable.setRowCount(cluster.getMetadataQuery().getRowCount(body));
final TableScan scan =
diff --git
a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/GenMRSkewJoinProcessor.java
b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/GenMRSkewJoinProcessor.java
index d846428f78a..f17d111baef 100644
---
a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/GenMRSkewJoinProcessor.java
+++
b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/GenMRSkewJoinProcessor.java
@@ -319,8 +319,7 @@ public static void processSkewJoin(JoinOperator joinOp,
}
mapJoinOp.setChildOperators(childOps);
- HiveConf jc = new HiveConf(parseCtx.getConf(),
- GenMRSkewJoinProcessor.class);
+ HiveConf jc = new HiveConf(parseCtx.getConf());
newPlan.setNumMapTasks(HiveConf
.getIntVar(jc,
HiveConf.ConfVars.HIVE_SKEWJOIN_MAPJOIN_NUM_MAP_TASK));
diff --git
a/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/plugin/HiveMetastoreClientFactoryImpl.java
b/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/plugin/HiveMetastoreClientFactoryImpl.java
index 9595d7945eb..197fa1e76bd 100644
---
a/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/plugin/HiveMetastoreClientFactoryImpl.java
+++
b/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/plugin/HiveMetastoreClientFactoryImpl.java
@@ -19,6 +19,7 @@
import org.apache.hadoop.hive.common.classification.InterfaceAudience.Private;
+import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.metastore.IMetaStoreClient;
import org.apache.hadoop.hive.metastore.api.MetaException;
import org.apache.hadoop.hive.ql.metadata.Hive;
@@ -29,11 +30,17 @@
@Private
public class HiveMetastoreClientFactoryImpl implements
HiveMetastoreClientFactory{
+ private final HiveConf hiveConf;
+
+ public HiveMetastoreClientFactoryImpl(HiveConf conf) {
+ this.hiveConf = conf;
+ }
+
@Override
public IMetaStoreClient getHiveMetastoreClient() throws
HiveAuthzPluginException {
String errMsg = "Error getting metastore client";
try {
- return Hive.get().getMSC();
+ return Hive.get(hiveConf, false).getMSC();
} catch (MetaException e) {
throw new HiveAuthzPluginException(errMsg, e);
} catch (HiveException e) {
diff --git
a/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/plugin/metastore/HiveMetaStoreAuthorizer.java
b/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/plugin/metastore/HiveMetaStoreAuthorizer.java
index 4825ef94ee8..cd71ae1f670 100644
---
a/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/plugin/metastore/HiveMetaStoreAuthorizer.java
+++
b/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/plugin/metastore/HiveMetaStoreAuthorizer.java
@@ -40,9 +40,8 @@
import org.apache.hadoop.hive.metastore.api.PartitionSpec;
import org.apache.hadoop.hive.metastore.api.TableMeta;
import org.apache.hadoop.hive.metastore.utils.MetaStoreUtils;
-import org.apache.hadoop.hive.ql.metadata.HiveException;
import org.apache.hadoop.hive.ql.metadata.HiveUtils;
-import org.apache.hadoop.hive.ql.security.HiveMetastoreAuthenticationProvider;
+import org.apache.hadoop.hive.ql.security.HiveAuthenticationProvider;
import static
org.apache.hadoop.hive.ql.security.authorization.plugin.HivePrivilegeObjectUtils.TablePrivilegeLookup;
import
org.apache.hadoop.hive.ql.security.authorization.plugin.metastore.events.*;
import
org.apache.hadoop.hive.ql.security.authorization.plugin.HiveAccessControlException;
@@ -82,12 +81,7 @@ public class HiveMetaStoreAuthorizer extends
MetaStorePreEventListener implement
/**
* The client configuration.
*/
- private static final ThreadLocal<Map<String, Object>> cConfig = new
ThreadLocal<Map<String, Object>>() {
- @Override
- protected Map<String, Object> initialValue() {
- return null;
- }
- };
+ private static final ThreadLocal<Map<String, Object>> cConfig =
ThreadLocal.withInitial(() -> null);
public static void setClientConfig(Map<String, Object> map) {
cConfig.set(map);
@@ -97,24 +91,7 @@ public static Map<String, Object> getClientConfig() {
return cConfig.get();
}
- private static final ThreadLocal<Configuration> tConfig = new
ThreadLocal<Configuration>() {
-
- @Override
- protected Configuration initialValue() {
- return null;
- }
- };
-
- private static final ThreadLocal<HiveMetastoreAuthenticationProvider>
tAuthenticator = new ThreadLocal<HiveMetastoreAuthenticationProvider>() {
- @Override
- protected HiveMetastoreAuthenticationProvider initialValue() {
- try {
- return (HiveMetastoreAuthenticationProvider)
HiveUtils.getAuthenticator(tConfig.get(),
HiveConf.ConfVars.HIVE_METASTORE_AUTHENTICATOR_MANAGER);
- } catch (HiveException excp) {
- throw new IllegalStateException("Authentication provider instantiation
failure", excp);
- }
- }
- };
+ private static final ThreadLocal<HiveAuthenticationProvider> tAuthenticator
= ThreadLocal.withInitial(() -> null);
public HiveMetaStoreAuthorizer(Configuration config) {
super(config);
@@ -627,18 +604,20 @@ HiveMetaStoreAuthzInfo buildAuthzContext(PreEventContext
preEventContext) throws
HiveAuthorizer createHiveMetaStoreAuthorizer() throws Exception {
HiveAuthorizer ret = null;
- HiveConf hiveConf = (HiveConf)tConfig.get();
- if(hiveConf == null){
- HiveConf hiveConf1 = new HiveConf(super.getConf(), HiveConf.class);
- tConfig.set(hiveConf1);
- hiveConf = hiveConf1;
- }
+ // If it's insides the HMS, getConf() should have all properties in
hive-site.xml,
+ // otherwise it at least contains the information to talk with the HMS,
+ // as the call is triggered from client as a filter hook.
+ HiveConf hiveConf = HiveConf.cloneConf(getConf());
+
HiveAuthorizerFactory authorizerFactory =
HiveUtils.getAuthorizerFactory(hiveConf,
HiveConf.ConfVars.HIVE_AUTHORIZATION_MANAGER);
-
if (authorizerFactory != null) {
- HiveMetastoreAuthenticationProvider authenticator = tAuthenticator.get();
-
+ HiveAuthenticationProvider authenticator = tAuthenticator.get();
+ if (authenticator == null) {
+ authenticator = HiveUtils.getAuthenticator(hiveConf,
+ HiveConf.ConfVars.HIVE_METASTORE_AUTHENTICATOR_MANAGER);
+ tAuthenticator.set(authenticator);
+ }
authenticator.setConf(hiveConf);
HiveAuthzSessionContext.Builder authzContextBuilder = new
HiveAuthzSessionContext.Builder();
@@ -649,7 +628,7 @@ HiveAuthorizer createHiveMetaStoreAuthorizer() throws
Exception {
HiveAuthzSessionContext authzSessionContext =
authzContextBuilder.build();
ret = authorizerFactory
- .createHiveAuthorizer(new HiveMetastoreClientFactoryImpl(),
hiveConf, authenticator, authzSessionContext);
+ .createHiveAuthorizer(new HiveMetastoreClientFactoryImpl(hiveConf),
hiveConf, authenticator, authzSessionContext);
}
return ret;
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/session/SessionState.java
b/ql/src/java/org/apache/hadoop/hive/ql/session/SessionState.java
index 8849be89853..d921c662ac2 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/session/SessionState.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/session/SessionState.java
@@ -1026,7 +1026,7 @@ private synchronized void setupAuth() {
: CLIENT_TYPE.HIVECLI);
authzContextBuilder.setSessionString(getSessionId());
- authorizerV2 = authorizerFactory.createHiveAuthorizer(new
HiveMetastoreClientFactoryImpl(),
+ authorizerV2 = authorizerFactory.createHiveAuthorizer(new
HiveMetastoreClientFactoryImpl(getSessionConf()),
sessionConf, authenticator, authzContextBuilder.build());
setAuthorizerV2Config();
diff --git
a/ql/src/java/org/apache/hadoop/hive/ql/txn/compactor/CompactorThread.java
b/ql/src/java/org/apache/hadoop/hive/ql/txn/compactor/CompactorThread.java
index b3f5d2a7eeb..383b22413fe 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/txn/compactor/CompactorThread.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/txn/compactor/CompactorThread.java
@@ -61,7 +61,7 @@ public void setConf(Configuration configuration) {
// HiveConf is moved to the standalone metastore.
//clone the conf - compactor needs to set properties in it which we don't
// want to bleed into the caller
- conf = new HiveConf(configuration, HiveConf.class);
+ conf = HiveConf.cloneConf(configuration);
}
@Override
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFUtils.java
b/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFUtils.java
index 89e571aaacb..af171484575 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFUtils.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFUtils.java
@@ -28,9 +28,11 @@
public class UDFUtils {
+ private static final HiveConf HIVE_CONF = new HiveConf();
+
public static TimestampTZ getTimestampTZFromTimestamp(Timestamp timestamp) {
ZoneId zone = ((SessionState.get() == null) ?
- new HiveConf().getLocalTimeZone() :
SessionState.get().getConf().getLocalTimeZone());
+ HIVE_CONF.getLocalTimeZone() :
SessionState.get().getConf().getLocalTimeZone());
return TimestampTZUtil.convert(timestamp, zone);
}
}