Author: daijy
Date: Thu Sep 17 02:38:59 2015
New Revision: 1703481

URL: http://svn.apache.org/r1703481
Log:
PIG-4679: Performance degradation due to InputSizeReducerEstimator since 
PIG-3754

Modified:
    pig/trunk/CHANGES.txt
    
pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/InputSizeReducerEstimator.java
    
pig/trunk/test/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/TestInputSizeReducerEstimator.java

Modified: pig/trunk/CHANGES.txt
URL: 
http://svn.apache.org/viewvc/pig/trunk/CHANGES.txt?rev=1703481&r1=1703480&r2=1703481&view=diff
==============================================================================
--- pig/trunk/CHANGES.txt (original)
+++ pig/trunk/CHANGES.txt Thu Sep 17 02:38:59 2015
@@ -40,6 +40,8 @@ PIG-4639: Add better parser for Apache H
 
 BUG FIXES
 
+PIG-4679: Performance degradation due to InputSizeReducerEstimator since 
PIG-3754 (daijy)
+
 PIG-4315: MergeJoin or Split followed by order by gives NPE in Tez (rohini)
 
 PIG-4654: Reduce tez memory.reserve-fraction and clear spillables for better 
memory utilization (rohini)

Modified: 
pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/InputSizeReducerEstimator.java
URL: 
http://svn.apache.org/viewvc/pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/InputSizeReducerEstimator.java?rev=1703481&r1=1703480&r2=1703481&view=diff
==============================================================================
--- 
pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/InputSizeReducerEstimator.java
 (original)
+++ 
pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/InputSizeReducerEstimator.java
 Thu Sep 17 02:38:59 2015
@@ -137,11 +137,11 @@ public class InputSizeReducerEstimator i
                             }
                         } else {
                             // If file is not found, we should report -1
-                            return -1;
+                            continue;
                         }
                     } else {
                         // If we cannot estimate size of a location, we should 
report -1
-                        return -1;
+                        continue;
                     }
                 }
             }

Modified: 
pig/trunk/test/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/TestInputSizeReducerEstimator.java
URL: 
http://svn.apache.org/viewvc/pig/trunk/test/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/TestInputSizeReducerEstimator.java?rev=1703481&r1=1703480&r2=1703481&view=diff
==============================================================================
--- 
pig/trunk/test/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/TestInputSizeReducerEstimator.java
 (original)
+++ 
pig/trunk/test/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/TestInputSizeReducerEstimator.java
 Thu Sep 17 02:38:59 2015
@@ -38,30 +38,26 @@ public class TestInputSizeReducerEstimat
     @Test
     public void testGetInputSizeFromFs() throws Exception {
         long size = 2L * 1024 * 1024 * 1024;
+        POLoad load1 = createPOLoadWithSize(size, new PigStorage());
+        POLoad load2 = createPOLoadWithSize(size, new 
PigStorageWithStatistics());
         Assert.assertEquals(size, 
InputSizeReducerEstimator.getTotalInputFileSize(
-                CONF, Lists.newArrayList(createPOLoadWithSize(size, new 
PigStorage())),
-                new org.apache.hadoop.mapreduce.Job(CONF)));
+                CONF, Lists.newArrayList(load1), new 
org.apache.hadoop.mapreduce.Job(CONF)));
 
         Assert.assertEquals(size, 
InputSizeReducerEstimator.getTotalInputFileSize(
-                CONF,
-                Lists.newArrayList(createPOLoadWithSize(size, new 
PigStorageWithStatistics())),
-                new org.apache.hadoop.mapreduce.Job(CONF)));
+                CONF, Lists.newArrayList(load2), new 
org.apache.hadoop.mapreduce.Job(CONF)));
 
         Assert.assertEquals(size * 2, 
InputSizeReducerEstimator.getTotalInputFileSize(
-                CONF,
-                Lists.newArrayList(
-                        createPOLoadWithSize(size, new PigStorage()),
-                        createPOLoadWithSize(size, new 
PigStorageWithStatistics())),
-                        new org.apache.hadoop.mapreduce.Job(CONF)));
+                CONF, Lists.newArrayList(load1, load2), new 
org.apache.hadoop.mapreduce.Job(CONF)));
 
         // Negative test - PIG-3754
-        POLoad poLoad = createPOLoadWithSize(size, new PigStorage());
-        poLoad.setLFile(new FileSpec("hbase://users", null));
+        load1.setLFile(new FileSpec("hbase://users", null));
 
-        Assert.assertEquals(-1, 
InputSizeReducerEstimator.getTotalInputFileSize(
-                CONF,
-                Collections.singletonList(poLoad),
-                new org.apache.hadoop.mapreduce.Job(CONF)));
+        Assert.assertEquals(0, InputSizeReducerEstimator.getTotalInputFileSize(
+                CONF, Collections.singletonList(load1), new 
org.apache.hadoop.mapreduce.Job(CONF)));
+
+        // Skip non-hdfs input - PIG-4679
+        Assert.assertEquals(size, 
InputSizeReducerEstimator.getTotalInputFileSize(
+                CONF, Lists.newArrayList(load1, load2), new 
org.apache.hadoop.mapreduce.Job(CONF)));
     }
 
     @Test


Reply via email to