This is an automated email from the ASF dual-hosted git repository.
ayushsaxena pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git
The following commit(s) were added to refs/heads/master by this push:
new 7d52a591d78 HIVE-23831: Make auto reducer parallelism min threshold
configurable.. (#4432). (okumin, reviewed by Ayush Saxena)
7d52a591d78 is described below
commit 7d52a591d78a9b6ed967aa4bf38f2b31a138dcc1
Author: okumin <[email protected]>
AuthorDate: Mon Jun 26 14:49:18 2023 +0900
HIVE-23831: Make auto reducer parallelism min threshold configurable..
(#4432). (okumin, reviewed by Ayush Saxena)
---
common/src/java/org/apache/hadoop/hive/conf/HiveConf.java | 4 ++++
ql/src/java/org/apache/hadoop/hive/ql/parse/GenTezUtils.java | 4 ++--
2 files changed, 6 insertions(+), 2 deletions(-)
diff --git a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
index 196a1fc4eab..294c423d665 100644
--- a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
+++ b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
@@ -4768,6 +4768,10 @@ public class HiveConf extends Configuration {
"Turn on Tez' auto reducer parallelism feature. When enabled, Hive
will still estimate data sizes\n" +
"and set parallelism estimates. Tez will sample source vertices'
output sizes and adjust the estimates at runtime as\n" +
"necessary."),
+
TEZ_AUTO_REDUCER_PARALLELISM_MIN_THRESHOLD("hive.tez.auto.reducer.parallelism.min.threshold",
1.0f,
+ "Hive on Tez disables auto reducer parallelism if # of reducers *
hive.tez.min.partition.factor is smaller\n" +
+ "than this value. This helps to avoid overhead when the potential
impact of auto reducer parallelism is not\n" +
+ "significant. This is effective only when
hive.tez.auto.reducer.parallelism is true."),
TEZ_LLAP_MIN_REDUCER_PER_EXECUTOR("hive.tez.llap.min.reducer.per.executor",
0.33f,
"If above 0, the min number of reducers for auto-parallelism for LLAP
scheduling will\n" +
"be set to this fraction of the number of executors."),
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/GenTezUtils.java
b/ql/src/java/org/apache/hadoop/hive/ql/parse/GenTezUtils.java
index 08dbbcdd4fb..c4594036a3a 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/GenTezUtils.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/GenTezUtils.java
@@ -130,8 +130,8 @@ public class GenTezUtils {
maxPartition = (maxPartition > maxReducers) ? maxReducers : maxPartition;
// reduce only if the parameters are significant
- if (minPartition < maxPartition &&
- nReducers * minPartitionFactor >= 1.0) {
+ final float minThreshold =
context.conf.getFloatVar(HiveConf.ConfVars.TEZ_AUTO_REDUCER_PARALLELISM_MIN_THRESHOLD);
+ if (minPartition < maxPartition && nReducers * minPartitionFactor >=
minThreshold) {
reduceWork.setAutoReduceParallelism(true);
reduceWork.setMinReduceTasks(minPartition);