This is an automated email from the ASF dual-hosted git repository.

abstractdog pushed a commit to branch branch-0.10.3
in repository https://gitbox.apache.org/repos/asf/tez.git

commit cbac6942f1eefac4eabbdd709c01ff6cf1c2c55a
Author: Sourabh Badhya <iamsbad...@gmail.com>
AuthorDate: Tue Jan 9 21:16:05 2024 +0530

    TEZ-4526: Avoid calling LocationProvider#getPreferredLocations multiple 
times while generating grouped splits (#323)
---
 .../java/org/apache/tez/mapreduce/grouper/TezSplitGrouper.java | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git 
a/tez-mapreduce/src/main/java/org/apache/tez/mapreduce/grouper/TezSplitGrouper.java
 
b/tez-mapreduce/src/main/java/org/apache/tez/mapreduce/grouper/TezSplitGrouper.java
index b4143494f..176eb1a10 100644
--- 
a/tez-mapreduce/src/main/java/org/apache/tez/mapreduce/grouper/TezSplitGrouper.java
+++ 
b/tez-mapreduce/src/main/java/org/apache/tez/mapreduce/grouper/TezSplitGrouper.java
@@ -208,10 +208,12 @@ public abstract class TezSplitGrouper {
 
     long totalLength = 0;
     Map<String, LocationHolder> distinctLocations = createLocationsMap(conf);
+    Map<SplitContainer, String[]> splitToLocationsMap = new 
HashMap<>(originalSplits.size());
     // go through splits and add them to locations
     for (SplitContainer split : originalSplits) {
       totalLength += estimator.getEstimatedSize(split);
       String[] locations = locationProvider.getPreferredLocations(split);
+      splitToLocationsMap.put(split, locations);
       if (locations == null || locations.length == 0) {
         locations = emptyLocations;
         allSplitsHaveLocalhost = false;
@@ -293,7 +295,7 @@ public abstract class TezSplitGrouper {
       groupedSplits = new 
ArrayList<GroupedSplitContainer>(originalSplits.size());
       for (SplitContainer split : originalSplits) {
         GroupedSplitContainer newSplit =
-            new GroupedSplitContainer(1, wrappedInputFormatName, 
cleanupLocations(locationProvider.getPreferredLocations(split)),
+            new GroupedSplitContainer(1, wrappedInputFormatName, 
cleanupLocations(splitToLocationsMap.get(split)),
                 null);
         newSplit.addSplit(split);
         groupedSplits.add(newSplit);
@@ -314,7 +316,7 @@ public abstract class TezSplitGrouper {
     Set<String> locSet = new HashSet<String>();
     for (SplitContainer split : originalSplits) {
       locSet.clear();
-      String[] locations = locationProvider.getPreferredLocations(split);
+      String[] locations = splitToLocationsMap.get(split);
       if (locations == null || locations.length == 0) {
         locations = emptyLocations;
       }
@@ -408,7 +410,7 @@ public abstract class TezSplitGrouper {
           groupLocation = null;
         } else if (doingRackLocal) {
           for (SplitContainer splitH : group) {
-            String[] locations = 
locationProvider.getPreferredLocations(splitH);
+            String[] locations = splitToLocationsMap.get(splitH);
             if (locations != null) {
               for (String loc : locations) {
                 if (loc != null) {
@@ -503,7 +505,7 @@ public abstract class TezSplitGrouper {
           }
           numRackSplitsToGroup--;
           rackSet.clear();
-          String[] locations = locationProvider.getPreferredLocations(split);
+          String[] locations = splitToLocationsMap.get(split);
           if (locations == null || locations.length == 0) {
             locations = emptyLocations;
           }

Reply via email to