Author: daijy
Date: Mon Nov  2 19:52:05 2015
New Revision: 1712129

URL: http://svn.apache.org/viewvc?rev=1712129&view=rev
Log:
PIG-4679: Performance degradation due to InputSizeReducerEstimator since 
PIG-3754

Modified:
    pig/branches/branch-0.15/CHANGES.txt
    
pig/branches/branch-0.15/src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/InputSizeReducerEstimator.java
    
pig/branches/branch-0.15/test/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/TestInputSizeReducerEstimator.java
    pig/branches/branch-0.15/test/org/apache/pig/test/TestJobSubmission.java

Modified: pig/branches/branch-0.15/CHANGES.txt
URL: 
http://svn.apache.org/viewvc/pig/branches/branch-0.15/CHANGES.txt?rev=1712129&r1=1712128&r2=1712129&view=diff
==============================================================================
--- pig/branches/branch-0.15/CHANGES.txt (original)
+++ pig/branches/branch-0.15/CHANGES.txt Mon Nov  2 19:52:05 2015
@@ -28,6 +28,8 @@ OPTIMIZATIONS
 
 BUG FIXES
 
+PIG-4679: Performance degradation due to InputSizeReducerEstimator since 
PIG-3754 (daijy)
+
 PIG-4644: PORelationToExprProject.clone() is broken (erwaman via rohini)
 
 PIG-4703: TezOperator.stores shall not ship to backend (daijy)

Modified: 
pig/branches/branch-0.15/src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/InputSizeReducerEstimator.java
URL: 
http://svn.apache.org/viewvc/pig/branches/branch-0.15/src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/InputSizeReducerEstimator.java?rev=1712129&r1=1712128&r2=1712129&view=diff
==============================================================================
--- 
pig/branches/branch-0.15/src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/InputSizeReducerEstimator.java
 (original)
+++ 
pig/branches/branch-0.15/src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/InputSizeReducerEstimator.java
 Mon Nov  2 19:52:05 2015
@@ -137,11 +137,11 @@ public class InputSizeReducerEstimator i
                             }
                         } else {
                             // If file is not found, we should report -1
-                            return -1;
+                            continue;
                         }
                     } else {
                         // If we cannot estimate size of a location, we should 
report -1
-                        return -1;
+                        continue;
                     }
                 }
             }

Modified: 
pig/branches/branch-0.15/test/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/TestInputSizeReducerEstimator.java
URL: 
http://svn.apache.org/viewvc/pig/branches/branch-0.15/test/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/TestInputSizeReducerEstimator.java?rev=1712129&r1=1712128&r2=1712129&view=diff
==============================================================================
--- 
pig/branches/branch-0.15/test/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/TestInputSizeReducerEstimator.java
 (original)
+++ 
pig/branches/branch-0.15/test/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/TestInputSizeReducerEstimator.java
 Mon Nov  2 19:52:05 2015
@@ -38,30 +38,26 @@ public class TestInputSizeReducerEstimat
     @Test
     public void testGetInputSizeFromFs() throws Exception {
         long size = 2L * 1024 * 1024 * 1024;
+        POLoad load1 = createPOLoadWithSize(size, new PigStorage());
+        POLoad load2 = createPOLoadWithSize(size, new 
PigStorageWithStatistics());
         Assert.assertEquals(size, 
InputSizeReducerEstimator.getTotalInputFileSize(
-                CONF, Lists.newArrayList(createPOLoadWithSize(size, new 
PigStorage())),
-                new org.apache.hadoop.mapreduce.Job(CONF)));
+                CONF, Lists.newArrayList(load1), new 
org.apache.hadoop.mapreduce.Job(CONF)));
 
         Assert.assertEquals(size, 
InputSizeReducerEstimator.getTotalInputFileSize(
-                CONF,
-                Lists.newArrayList(createPOLoadWithSize(size, new 
PigStorageWithStatistics())),
-                new org.apache.hadoop.mapreduce.Job(CONF)));
+                CONF, Lists.newArrayList(load2), new 
org.apache.hadoop.mapreduce.Job(CONF)));
 
         Assert.assertEquals(size * 2, 
InputSizeReducerEstimator.getTotalInputFileSize(
-                CONF,
-                Lists.newArrayList(
-                        createPOLoadWithSize(size, new PigStorage()),
-                        createPOLoadWithSize(size, new 
PigStorageWithStatistics())),
-                        new org.apache.hadoop.mapreduce.Job(CONF)));
+                CONF, Lists.newArrayList(load1, load2), new 
org.apache.hadoop.mapreduce.Job(CONF)));
 
         // Negative test - PIG-3754
-        POLoad poLoad = createPOLoadWithSize(size, new PigStorage());
-        poLoad.setLFile(new FileSpec("hbase://users", null));
+        load1.setLFile(new FileSpec("hbase://users", null));
 
-        Assert.assertEquals(-1, 
InputSizeReducerEstimator.getTotalInputFileSize(
-                CONF,
-                Collections.singletonList(poLoad),
-                new org.apache.hadoop.mapreduce.Job(CONF)));
+        Assert.assertEquals(0, InputSizeReducerEstimator.getTotalInputFileSize(
+                CONF, Collections.singletonList(load1), new 
org.apache.hadoop.mapreduce.Job(CONF)));
+
+        // Skip non-hdfs input - PIG-4679
+        Assert.assertEquals(size, 
InputSizeReducerEstimator.getTotalInputFileSize(
+                CONF, Lists.newArrayList(load1, load2), new 
org.apache.hadoop.mapreduce.Job(CONF)));
     }
 
     @Test

Modified: 
pig/branches/branch-0.15/test/org/apache/pig/test/TestJobSubmission.java
URL: 
http://svn.apache.org/viewvc/pig/branches/branch-0.15/test/org/apache/pig/test/TestJobSubmission.java?rev=1712129&r1=1712128&r2=1712129&view=diff
==============================================================================
--- pig/branches/branch-0.15/test/org/apache/pig/test/TestJobSubmission.java 
(original)
+++ pig/branches/branch-0.15/test/org/apache/pig/test/TestJobSubmission.java 
Mon Nov  2 19:52:05 2015
@@ -251,7 +251,7 @@ abstract public class TestJobSubmission
         jc=jcc.compile(mrPlan, "Test");
         job = jc.getWaitingJobs().get(0);
 
-        Util.assertParallelValues(-1, -1, -1, 1, job.getJobConf());
+        Util.assertParallelValues(-1, -1, 1, 1, job.getJobConf());
 
         util.deleteTable(Bytes.toBytesBinary("test_table"));
         // In HBase 0.90.1 and above we can use util.shutdownMiniHBaseCluster()


Reply via email to