This is an automated email from the ASF dual-hosted git repository.

abstractdog pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/tez.git


The following commit(s) were added to refs/heads/master by this push:
     new b95defcd7 TEZ-4407: Misleading split info in TezSplitGrouper logs when 
adjusting small splits (#202) (Stamatis Zampetakis reviewed by Laszlo Bodor)
b95defcd7 is described below

commit b95defcd7a60ed16249a0352392a9495401ea24f
Author: Stamatis Zampetakis <zabe...@gmail.com>
AuthorDate: Mon Dec 23 09:12:00 2024 +0100

    TEZ-4407: Misleading split info in TezSplitGrouper logs when adjusting 
small splits (#202) (Stamatis Zampetakis reviewed by Laszlo Bodor)
---
 .../tez/mapreduce/grouper/TezSplitGrouper.java     | 36 +++++++++-------------
 1 file changed, 14 insertions(+), 22 deletions(-)

diff --git 
a/tez-mapreduce/src/main/java/org/apache/tez/mapreduce/grouper/TezSplitGrouper.java
 
b/tez-mapreduce/src/main/java/org/apache/tez/mapreduce/grouper/TezSplitGrouper.java
index 176eb1a10..067acca9e 100644
--- 
a/tez-mapreduce/src/main/java/org/apache/tez/mapreduce/grouper/TezSplitGrouper.java
+++ 
b/tez-mapreduce/src/main/java/org/apache/tez/mapreduce/grouper/TezSplitGrouper.java
@@ -253,36 +253,28 @@ public abstract class TezSplitGrouper {
             "Invalid max/min group lengths. Required min>0, max>=min. " +
                 " max: " + maxLengthPerGroup + " min: " + minLengthPerGroup);
       }
+      int newDesiredNumSplits = -1;
       if (lengthPerGroup > maxLengthPerGroup) {
         // splits too big to work. Need to override with max size.
-        int newDesiredNumSplits = (int)(totalLength/maxLengthPerGroup) + 1;
-        LOG.info("Desired splits: " + desiredNumSplits + " too small. " +
-            " Desired splitLength: " + lengthPerGroup +
-            " Max splitLength: " + maxLengthPerGroup +
-            " New desired splits: " + newDesiredNumSplits +
-            " Total length: " + totalLength +
-            " Original splits: " + originalSplits.size());
-
-        desiredNumSplits = newDesiredNumSplits;
+        newDesiredNumSplits = (int)(totalLength/maxLengthPerGroup) + 1;
       } else if (lengthPerGroup < minLengthPerGroup) {
         // splits too small to work. Need to override with size.
-        int newDesiredNumSplits = (int)(totalLength/minLengthPerGroup) + 1;
-        /**
-         * This is a workaround for systems like S3 that pass the same
-         * fake hostname for all splits.
-         */
-        if (!allSplitsHaveLocalhost) {
-          desiredNumSplits = newDesiredNumSplits;
+        newDesiredNumSplits = (int)(totalLength/minLengthPerGroup) + 1;
+        if (allSplitsHaveLocalhost) {
+          // Workaround for systems like S3 that pass the same fake hostname 
for all splits.
+          LOG.info("Ignore {} configuration cause all splits seem to be on 
localhost.", TEZ_GROUPING_SPLIT_MIN_SIZE);
+          newDesiredNumSplits = desiredNumSplits;
         }
-
-        LOG.info("Desired splits: " + desiredNumSplits + " too large. " +
-            " Desired splitLength: " + lengthPerGroup +
+      }
+      if (newDesiredNumSplits != -1) {
+        LOG.info("Desired splitLength " + lengthPerGroup + " exceeds min/max 
bounds. " +
             " Min splitLength: " + minLengthPerGroup +
-            " New desired splits: " + newDesiredNumSplits +
-            " Final desired splits: " + desiredNumSplits +
-            " All splits have localhost: " + allSplitsHaveLocalhost +
+            " Max splitLength: " + maxLengthPerGroup +
+            " Desired splits: " + desiredNumSplits +
+            " New Desired splits: " + newDesiredNumSplits +
             " Total length: " + totalLength +
             " Original splits: " + originalSplits.size());
+        desiredNumSplits = newDesiredNumSplits;
       }
     }
 

Reply via email to