This is an automated email from the ASF dual-hosted git repository.

abstractdog pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git


The following commit(s) were added to refs/heads/master by this push:
     new 9a9e9a3f277 HIVE-27937: Clarifying comments around tez container size 
(#4920) (Laszlo Bodor reviewed by Stamatis Zampetakis, Denys Kuzmenko)
9a9e9a3f277 is described below

commit 9a9e9a3f277b14d856b2ebc00da9b89604e74130
Author: Bodor Laszlo <[email protected]>
AuthorDate: Mon Jan 8 10:02:23 2024 +0100

    HIVE-27937: Clarifying comments around tez container size (#4920) (Laszlo 
Bodor reviewed by Stamatis Zampetakis, Denys Kuzmenko)
---
 .../src/java/org/apache/hadoop/hive/conf/HiveConf.java   | 15 ++++++++++++++-
 data/conf/iceberg/llap/hive-site.xml                     |  8 +++++---
 data/conf/iceberg/llap/tez-site.xml                      |  4 ----
 data/conf/iceberg/tez/hive-site.xml                      |  3 +--
 data/conf/iceberg/tez/tez-site.xml                       |  4 ----
 data/conf/llap/hive-site.xml                             |  4 +++-
 data/conf/llap/tez-site.xml                              |  4 ----
 data/conf/tez/tez-site.xml                               |  4 ----
 .../org/apache/hadoop/hive/ql/exec/tez/DagUtils.java     | 16 +++++++++-------
 .../hive/ql/exec/tez/TezAvailableSlotsCalculator.java    |  9 ++++++++-
 10 files changed, 40 insertions(+), 31 deletions(-)

diff --git a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java 
b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
index cbe91a509ff..9a1433dc21d 100644
--- a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
+++ b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
@@ -2461,7 +2461,20 @@ public class HiveConf extends Configuration {
         "The default input format for tez. Tez groups splits in the AM."),
 
     HIVE_TEZ_CONTAINER_SIZE("hive.tez.container.size", -1,
-        "By default Tez will spawn containers of the size of a mapper. This 
can be used to overwrite."),
+        "The memory in MB that's used by a Tez task container (TezChild) in 
Tez container mode. Hive uses this \n"
+        + "property to create a Resource object which is accepted by Yarn (and 
used in TezAM to ask for TezChild \n"
+        + "containers). This should be distinguished from the Tez AM's 
(DAGAppMaster) memory, \n"
+        + "which is driven by tez.am.resource.memory.mb! \n"
+        + "Also, as Hive takes care of TezChild memory by setting this option, 
there is no need \n "
+        + "to set tez.task.resource.memory.mb differently. \n"
+        + "The final -Xmx arg for TezChild process is not equal to this 
setting, \n "
+        + "because Tez considers a heap fraction (80%), so by default: \n"
+        + "Xmx = hive.tez.container.size * 
tez.container.max.java.heap.fraction. \n"
+        + "In case of values <= 0, container size falls back to 
mapreduce.map.memory.mb. \n"
+        + "LLAP notes: while generating splits, the needed per-task resource 
is derived from this option \n"
+        + "(refer to HiveSplitGenerator, TezAvailableSlotsCalculator), so even 
if its value doesn't change the \n"
+        + "LLAP daemons' total physical size, it has to be configured 
properly. In this context \n"
+        + "4096 implies that you assume a single task will consume 4096MB from 
a daemon's shared heap."),
     HIVE_TEZ_CPU_VCORES("hive.tez.cpu.vcores", -1,
         "By default Tez will ask for however many cpus map-reduce is 
configured to use per container.\n" +
         "This can be used to overwrite."),
diff --git a/data/conf/iceberg/llap/hive-site.xml 
b/data/conf/iceberg/llap/hive-site.xml
index 57982980abf..fb941d991c4 100644
--- a/data/conf/iceberg/llap/hive-site.xml
+++ b/data/conf/iceberg/llap/hive-site.xml
@@ -38,10 +38,12 @@
         <description>A base for other temporary directories.</description>
     </property>
 
+    <!-- in LLAP mode hive.tez.container.size isn't used to actually determine 
container size, however while -->
+    <!-- calculating available slots (in split generation) in it's used 
through -->
+    <!-- the Vertex resource, so this has to be defined in order to get 
consistent test results -->
     <property>
-        <name>hive.tez.container.size</name>
-        <value>128</value>
-        <description></description>
+      <name>hive.tez.container.size</name>
+      <value>128</value>
     </property>
 
     <property>
diff --git a/data/conf/iceberg/llap/tez-site.xml 
b/data/conf/iceberg/llap/tez-site.xml
index fff6c875620..b96019bb37f 100644
--- a/data/conf/iceberg/llap/tez-site.xml
+++ b/data/conf/iceberg/llap/tez-site.xml
@@ -11,10 +11,6 @@
     <name>tez.runtime.io.sort.mb</name>
     <value>24</value>
   </property>
-  <property>
-    <name>hive.tez.container.size</name>
-    <value>512</value>
-  </property>
   <property>
     <name>tez.counters.max</name>
     <value>1024</value>
diff --git a/data/conf/iceberg/tez/hive-site.xml 
b/data/conf/iceberg/tez/hive-site.xml
index 0dc31fed5ed..272859dd915 100644
--- a/data/conf/iceberg/tez/hive-site.xml
+++ b/data/conf/iceberg/tez/hive-site.xml
@@ -40,8 +40,7 @@
 
 <property>
   <name>hive.tez.container.size</name>
-  <value>128</value>
-  <description></description>
+  <value>512</value>
 </property>
 
 <property>
diff --git a/data/conf/iceberg/tez/tez-site.xml 
b/data/conf/iceberg/tez/tez-site.xml
index 3c2a96f7edb..7b26d6aad3e 100644
--- a/data/conf/iceberg/tez/tez-site.xml
+++ b/data/conf/iceberg/tez/tez-site.xml
@@ -11,10 +11,6 @@
     <name>tez.runtime.io.sort.mb</name>
     <value>24</value>
   </property>
-  <property>
-    <name>hive.tez.container.size</name>
-    <value>512</value>
-  </property>
   <property>
     <name>tez.counters.max</name>
     <value>1024</value>
diff --git a/data/conf/llap/hive-site.xml b/data/conf/llap/hive-site.xml
index 27609b7a2be..fe69d81de7e 100644
--- a/data/conf/llap/hive-site.xml
+++ b/data/conf/llap/hive-site.xml
@@ -44,10 +44,12 @@
   <description>A base for other temporary directories.</description>
 </property>
 
+<!-- in LLAP mode hive.tez.container.size isn't used to actually determine 
container size, however while -->
+<!-- calculating available slots (in split generation) in it's used through -->
+<!-- the Vertex resource, so this has to be defined in order to get consistent 
test results -->
 <property>
   <name>hive.tez.container.size</name>
   <value>128</value>
-  <description></description>
 </property>
 
 <property>
diff --git a/data/conf/llap/tez-site.xml b/data/conf/llap/tez-site.xml
index 6118e6edf8e..cc4177e3cbd 100644
--- a/data/conf/llap/tez-site.xml
+++ b/data/conf/llap/tez-site.xml
@@ -9,10 +9,6 @@
     <name>tez.am.resource.memory.mb</name>
     <value>128</value>
   </property>
-  <property>
-    <name>tez.task.resource.memory.mb</name>
-    <value>128</value>
-  </property>
   <property>
     <name>tez.runtime.io.sort.mb</name>
     <value>24</value>
diff --git a/data/conf/tez/tez-site.xml b/data/conf/tez/tez-site.xml
index 88adb6a57e8..ff3b468fca3 100644
--- a/data/conf/tez/tez-site.xml
+++ b/data/conf/tez/tez-site.xml
@@ -3,10 +3,6 @@
     <name>tez.am.resource.memory.mb</name>
     <value>512</value>
   </property>
-  <property>
-    <name>tez.task.resource.memory.mb</name>
-    <value>128</value>
-  </property>
   <property>
     <name>tez.runtime.io.sort.mb</name>
     <value>24</value>
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/DagUtils.java 
b/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/DagUtils.java
index 896be0018d7..09d2ff71933 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/DagUtils.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/DagUtils.java
@@ -23,6 +23,7 @@ import java.util.concurrent.ConcurrentHashMap;
 
 import com.google.common.annotations.VisibleForTesting;
 import com.google.common.base.Function;
+import com.google.common.base.Strings;
 import com.google.common.base.Preconditions;
 import com.google.common.collect.Iterators;
 import com.google.common.collect.Lists;
@@ -686,7 +687,9 @@ public class DagUtils {
         cpuCores = MRJobConfig.DEFAULT_MAP_CPU_VCORES;
       }
     }
-    return Resource.newInstance(memorySizeMb, cpuCores);
+    Resource resource = Resource.newInstance(memorySizeMb, cpuCores);
+    LOG.debug("Tez container resource: {}", resource);
+    return resource;
   }
 
   /*
@@ -716,19 +719,18 @@ public class DagUtils {
     }
     logLevel = sb.toString();
 
+    String finalOpts = null;
     if (HiveConf.getIntVar(conf, HiveConf.ConfVars.HIVE_TEZ_CONTAINER_SIZE) > 
0) {
-      if (javaOpts != null) {
-        return javaOpts + " " + logLevel;
-      } else  {
-        return logLevel;
-      }
+      finalOpts = Strings.nullToEmpty(javaOpts) + " " + logLevel;
     } else {
       if (javaOpts != null && !javaOpts.isEmpty()) {
         LOG.warn(HiveConf.ConfVars.HIVE_TEZ_JAVA_OPTS + " will be ignored 
because "
                  + HiveConf.ConfVars.HIVE_TEZ_CONTAINER_SIZE + " is not set!");
       }
-      return logLevel + " " + MRHelpers.getJavaOptsForMRMapper(conf);
+      finalOpts = logLevel + " " + MRHelpers.getJavaOptsForMRMapper(conf);
     }
+    LOG.debug("Tez container final opts: {}", finalOpts);
+    return finalOpts;
   }
 
   private Vertex createVertexFromMergeWork(JobConf conf, MergeJoinWork 
mergeJoinWork,
diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/TezAvailableSlotsCalculator.java
 
b/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/TezAvailableSlotsCalculator.java
index 731ad0942f8..e0117a08ba6 100644
--- 
a/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/TezAvailableSlotsCalculator.java
+++ 
b/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/TezAvailableSlotsCalculator.java
@@ -20,11 +20,15 @@ package org.apache.hadoop.hive.ql.exec.tez;
 
 import org.apache.hadoop.conf.Configuration;
 import org.apache.tez.runtime.api.InputInitializerContext;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
 
 /**
  * Default implementation of AvailableSlotsCalculator which relies on 
available capacity of the cluster
  */
 public class TezAvailableSlotsCalculator implements AvailableSlotsCalculator {
+  private static final Logger LOG = 
LoggerFactory.getLogger(TezAvailableSlotsCalculator.class);
+
     private InputInitializerContext inputInitializerContext;
     @Override
     public void initialize(Configuration conf, HiveSplitGenerator 
splitGenerator) {
@@ -39,6 +43,9 @@ public class TezAvailableSlotsCalculator implements 
AvailableSlotsCalculator {
         }
         int totalResource = 
inputInitializerContext.getTotalAvailableResource().getMemory();
         int taskResource = 
inputInitializerContext.getVertexTaskResource().getMemory();
-        return totalResource / taskResource;
+        int availableSlots = totalResource / taskResource;;
+        LOG.debug("totalResource: {}mb / taskResource: {}mb =  availableSlots: 
{}", totalResource, taskResource,
+            availableSlots);
+        return availableSlots;
     }
 }

Reply via email to