This is an automated email from the ASF dual-hosted git repository.

ayushsaxena pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git


The following commit(s) were added to refs/heads/master by this push:
     new 7d52a591d78 HIVE-23831: Make auto reducer parallelism min threshold 
configurable.. (#4432). (okumin, reviewed by Ayush Saxena)
7d52a591d78 is described below

commit 7d52a591d78a9b6ed967aa4bf38f2b31a138dcc1
Author: okumin <[email protected]>
AuthorDate: Mon Jun 26 14:49:18 2023 +0900

    HIVE-23831: Make auto reducer parallelism min threshold configurable.. 
(#4432). (okumin, reviewed by Ayush Saxena)
---
 common/src/java/org/apache/hadoop/hive/conf/HiveConf.java    | 4 ++++
 ql/src/java/org/apache/hadoop/hive/ql/parse/GenTezUtils.java | 4 ++--
 2 files changed, 6 insertions(+), 2 deletions(-)

diff --git a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java 
b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
index 196a1fc4eab..294c423d665 100644
--- a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
+++ b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
@@ -4768,6 +4768,10 @@ public class HiveConf extends Configuration {
         "Turn on Tez' auto reducer parallelism feature. When enabled, Hive 
will still estimate data sizes\n" +
         "and set parallelism estimates. Tez will sample source vertices' 
output sizes and adjust the estimates at runtime as\n" +
         "necessary."),
+    
TEZ_AUTO_REDUCER_PARALLELISM_MIN_THRESHOLD("hive.tez.auto.reducer.parallelism.min.threshold",
 1.0f,
+        "Hive on Tez disables auto reducer parallelism if # of reducers * 
hive.tez.min.partition.factor is smaller\n" +
+        "than this value. This helps to avoid overhead when the potential 
impact of auto reducer parallelism is not\n" +
+        "significant. This is effective only when 
hive.tez.auto.reducer.parallelism is true."),
     
TEZ_LLAP_MIN_REDUCER_PER_EXECUTOR("hive.tez.llap.min.reducer.per.executor", 
0.33f,
         "If above 0, the min number of reducers for auto-parallelism for LLAP 
scheduling will\n" +
         "be set to this fraction of the number of executors."),
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/GenTezUtils.java 
b/ql/src/java/org/apache/hadoop/hive/ql/parse/GenTezUtils.java
index 08dbbcdd4fb..c4594036a3a 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/GenTezUtils.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/GenTezUtils.java
@@ -130,8 +130,8 @@ public class GenTezUtils {
       maxPartition = (maxPartition > maxReducers) ? maxReducers : maxPartition;
 
       // reduce only if the parameters are significant
-      if (minPartition < maxPartition &&
-          nReducers * minPartitionFactor >= 1.0) {
+      final float minThreshold = 
context.conf.getFloatVar(HiveConf.ConfVars.TEZ_AUTO_REDUCER_PARALLELISM_MIN_THRESHOLD);
+      if (minPartition < maxPartition && nReducers * minPartitionFactor >= 
minThreshold) {
         reduceWork.setAutoReduceParallelism(true);
 
         reduceWork.setMinReduceTasks(minPartition);

Reply via email to