This is an automated email from the ASF dual-hosted git repository. abstractdog pushed a commit to branch branch-0.10.3 in repository https://gitbox.apache.org/repos/asf/tez.git
commit cbac6942f1eefac4eabbdd709c01ff6cf1c2c55a Author: Sourabh Badhya <iamsbad...@gmail.com> AuthorDate: Tue Jan 9 21:16:05 2024 +0530 TEZ-4526: Avoid calling LocationProvider#getPreferredLocations multiple times while generating grouped splits (#323) --- .../java/org/apache/tez/mapreduce/grouper/TezSplitGrouper.java | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/tez-mapreduce/src/main/java/org/apache/tez/mapreduce/grouper/TezSplitGrouper.java b/tez-mapreduce/src/main/java/org/apache/tez/mapreduce/grouper/TezSplitGrouper.java index b4143494f..176eb1a10 100644 --- a/tez-mapreduce/src/main/java/org/apache/tez/mapreduce/grouper/TezSplitGrouper.java +++ b/tez-mapreduce/src/main/java/org/apache/tez/mapreduce/grouper/TezSplitGrouper.java @@ -208,10 +208,12 @@ public abstract class TezSplitGrouper { long totalLength = 0; Map<String, LocationHolder> distinctLocations = createLocationsMap(conf); + Map<SplitContainer, String[]> splitToLocationsMap = new HashMap<>(originalSplits.size()); // go through splits and add them to locations for (SplitContainer split : originalSplits) { totalLength += estimator.getEstimatedSize(split); String[] locations = locationProvider.getPreferredLocations(split); + splitToLocationsMap.put(split, locations); if (locations == null || locations.length == 0) { locations = emptyLocations; allSplitsHaveLocalhost = false; @@ -293,7 +295,7 @@ public abstract class TezSplitGrouper { groupedSplits = new ArrayList<GroupedSplitContainer>(originalSplits.size()); for (SplitContainer split : originalSplits) { GroupedSplitContainer newSplit = - new GroupedSplitContainer(1, wrappedInputFormatName, cleanupLocations(locationProvider.getPreferredLocations(split)), + new GroupedSplitContainer(1, wrappedInputFormatName, cleanupLocations(splitToLocationsMap.get(split)), null); newSplit.addSplit(split); groupedSplits.add(newSplit); @@ -314,7 +316,7 @@ public abstract class TezSplitGrouper { Set<String> locSet = new HashSet<String>(); for (SplitContainer split : originalSplits) { locSet.clear(); - String[] locations = locationProvider.getPreferredLocations(split); + String[] locations = splitToLocationsMap.get(split); if (locations == null || locations.length == 0) { locations = emptyLocations; } @@ -408,7 +410,7 @@ public abstract class TezSplitGrouper { groupLocation = null; } else if (doingRackLocal) { for (SplitContainer splitH : group) { - String[] locations = locationProvider.getPreferredLocations(splitH); + String[] locations = splitToLocationsMap.get(splitH); if (locations != null) { for (String loc : locations) { if (loc != null) { @@ -503,7 +505,7 @@ public abstract class TezSplitGrouper { } numRackSplitsToGroup--; rackSet.clear(); - String[] locations = locationProvider.getPreferredLocations(split); + String[] locations = splitToLocationsMap.get(split); if (locations == null || locations.length == 0) { locations = emptyLocations; }