Author: daijy
Date: Mon Jan  5 23:00:51 2015
New Revision: 1649687

URL: http://svn.apache.org/r1649687
Log:
PIG-2647: Split Combining drops splits with empty getLocations()

Modified:
    pig/trunk/CHANGES.txt
    
pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/util/MapRedUtil.java

Modified: pig/trunk/CHANGES.txt
URL: 
http://svn.apache.org/viewvc/pig/trunk/CHANGES.txt?rev=1649687&r1=1649686&r2=1649687&view=diff
==============================================================================
--- pig/trunk/CHANGES.txt (original)
+++ pig/trunk/CHANGES.txt Mon Jan  5 23:00:51 2015
@@ -30,6 +30,8 @@ PIG-4333: Split BigData tests into multi
  
 BUG FIXES
 
+PIG-2647: Split Combining drops splits with empty getLocations() (tmwoodruff 
via daijy)
+
 PIG-4294: Enable unit test "TestNestedForeach" for spark (kellyzly via rohini)
 
 PIG-4282: Enable unit test "TestForEachNestedPlan" for spark (kellyzly via 
rohini) 

Modified: 
pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/util/MapRedUtil.java
URL: 
http://svn.apache.org/viewvc/pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/util/MapRedUtil.java?rev=1649687&r1=1649686&r2=1649687&view=diff
==============================================================================
--- 
pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/util/MapRedUtil.java
 (original)
+++ 
pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/util/MapRedUtil.java
 Mon Jan  5 23:00:51 2015
@@ -472,28 +472,38 @@ public class MapRedUtil {
                 result.add(combinedSplits);
                 resultLengths.add(split.getLength());
             } else {
-                ComparableSplit csplit = new ComparableSplit(split, 
comparableSplitId++);
                 String[] locations = split.getLocations();
-                // sort the locations to stabilize the number of maps: PIG-1757
-                Arrays.sort(locations);
-                HashSet<String> locationSeen = new HashSet<String>();
-                for (String location : locations)
-                {
-                    if (!locationSeen.contains(location))
+                if (locations.length == 0) {
+                    // This split is missing blocks, or the split returned bad 
locations.
+                    // Don't try to combine.
+                    comparableSplitId++;
+                    ArrayList<InputSplit> combinedSplits = new 
ArrayList<InputSplit>();
+                    combinedSplits.add(split);
+                    result.add(combinedSplits);
+                    resultLengths.add(split.getLength());
+                } else {
+                    ComparableSplit csplit = new ComparableSplit(split, 
comparableSplitId++);
+                    // sort the locations to stabilize the number of maps: 
PIG-1757
+                    Arrays.sort(locations);
+                    HashSet<String> locationSeen = new HashSet<String>();
+                    for (String location : locations)
                     {
-                        Node node = nodeMap.get(location);
-                        if (node == null) {
-                            node = new Node();
-                            nodes.add(node);
-                            nodeMap.put(location, node);
+                        if (!locationSeen.contains(location))
+                        {
+                            Node node = nodeMap.get(location);
+                            if (node == null) {
+                                node = new Node();
+                                nodes.add(node);
+                                nodeMap.put(location, node);
+                            }
+                            node.add(csplit);
+                            csplit.add(node);
+                            locationSeen.add(location);
                         }
-                        node.add(csplit);
-                        csplit.add(node);
-                        locationSeen.add(location);
                     }
+                    lastSplit = split;
+                    size++;
                 }
-                lastSplit = split;
-                size++;
             }
         }
         /* verification code: debug purpose


Reply via email to