Author: daijy
Date: Mon Nov 2 19:52:05 2015
New Revision: 1712129
URL: http://svn.apache.org/viewvc?rev=1712129&view=rev
Log:
PIG-4679: Performance degradation due to InputSizeReducerEstimator since
PIG-3754
Modified:
pig/branches/branch-0.15/CHANGES.txt
pig/branches/branch-0.15/src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/InputSizeReducerEstimator.java
pig/branches/branch-0.15/test/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/TestInputSizeReducerEstimator.java
pig/branches/branch-0.15/test/org/apache/pig/test/TestJobSubmission.java
Modified: pig/branches/branch-0.15/CHANGES.txt
URL:
http://svn.apache.org/viewvc/pig/branches/branch-0.15/CHANGES.txt?rev=1712129&r1=1712128&r2=1712129&view=diff
==============================================================================
--- pig/branches/branch-0.15/CHANGES.txt (original)
+++ pig/branches/branch-0.15/CHANGES.txt Mon Nov 2 19:52:05 2015
@@ -28,6 +28,8 @@ OPTIMIZATIONS
BUG FIXES
+PIG-4679: Performance degradation due to InputSizeReducerEstimator since
PIG-3754 (daijy)
+
PIG-4644: PORelationToExprProject.clone() is broken (erwaman via rohini)
PIG-4703: TezOperator.stores shall not ship to backend (daijy)
Modified:
pig/branches/branch-0.15/src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/InputSizeReducerEstimator.java
URL:
http://svn.apache.org/viewvc/pig/branches/branch-0.15/src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/InputSizeReducerEstimator.java?rev=1712129&r1=1712128&r2=1712129&view=diff
==============================================================================
---
pig/branches/branch-0.15/src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/InputSizeReducerEstimator.java
(original)
+++
pig/branches/branch-0.15/src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/InputSizeReducerEstimator.java
Mon Nov 2 19:52:05 2015
@@ -137,11 +137,11 @@ public class InputSizeReducerEstimator i
}
} else {
// If file is not found, we should report -1
- return -1;
+ continue;
}
} else {
// If we cannot estimate size of a location, we should
report -1
- return -1;
+ continue;
}
}
}
Modified:
pig/branches/branch-0.15/test/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/TestInputSizeReducerEstimator.java
URL:
http://svn.apache.org/viewvc/pig/branches/branch-0.15/test/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/TestInputSizeReducerEstimator.java?rev=1712129&r1=1712128&r2=1712129&view=diff
==============================================================================
---
pig/branches/branch-0.15/test/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/TestInputSizeReducerEstimator.java
(original)
+++
pig/branches/branch-0.15/test/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/TestInputSizeReducerEstimator.java
Mon Nov 2 19:52:05 2015
@@ -38,30 +38,26 @@ public class TestInputSizeReducerEstimat
@Test
public void testGetInputSizeFromFs() throws Exception {
long size = 2L * 1024 * 1024 * 1024;
+ POLoad load1 = createPOLoadWithSize(size, new PigStorage());
+ POLoad load2 = createPOLoadWithSize(size, new
PigStorageWithStatistics());
Assert.assertEquals(size,
InputSizeReducerEstimator.getTotalInputFileSize(
- CONF, Lists.newArrayList(createPOLoadWithSize(size, new
PigStorage())),
- new org.apache.hadoop.mapreduce.Job(CONF)));
+ CONF, Lists.newArrayList(load1), new
org.apache.hadoop.mapreduce.Job(CONF)));
Assert.assertEquals(size,
InputSizeReducerEstimator.getTotalInputFileSize(
- CONF,
- Lists.newArrayList(createPOLoadWithSize(size, new
PigStorageWithStatistics())),
- new org.apache.hadoop.mapreduce.Job(CONF)));
+ CONF, Lists.newArrayList(load2), new
org.apache.hadoop.mapreduce.Job(CONF)));
Assert.assertEquals(size * 2,
InputSizeReducerEstimator.getTotalInputFileSize(
- CONF,
- Lists.newArrayList(
- createPOLoadWithSize(size, new PigStorage()),
- createPOLoadWithSize(size, new
PigStorageWithStatistics())),
- new org.apache.hadoop.mapreduce.Job(CONF)));
+ CONF, Lists.newArrayList(load1, load2), new
org.apache.hadoop.mapreduce.Job(CONF)));
// Negative test - PIG-3754
- POLoad poLoad = createPOLoadWithSize(size, new PigStorage());
- poLoad.setLFile(new FileSpec("hbase://users", null));
+ load1.setLFile(new FileSpec("hbase://users", null));
- Assert.assertEquals(-1,
InputSizeReducerEstimator.getTotalInputFileSize(
- CONF,
- Collections.singletonList(poLoad),
- new org.apache.hadoop.mapreduce.Job(CONF)));
+ Assert.assertEquals(0, InputSizeReducerEstimator.getTotalInputFileSize(
+ CONF, Collections.singletonList(load1), new
org.apache.hadoop.mapreduce.Job(CONF)));
+
+ // Skip non-hdfs input - PIG-4679
+ Assert.assertEquals(size,
InputSizeReducerEstimator.getTotalInputFileSize(
+ CONF, Lists.newArrayList(load1, load2), new
org.apache.hadoop.mapreduce.Job(CONF)));
}
@Test
Modified:
pig/branches/branch-0.15/test/org/apache/pig/test/TestJobSubmission.java
URL:
http://svn.apache.org/viewvc/pig/branches/branch-0.15/test/org/apache/pig/test/TestJobSubmission.java?rev=1712129&r1=1712128&r2=1712129&view=diff
==============================================================================
--- pig/branches/branch-0.15/test/org/apache/pig/test/TestJobSubmission.java
(original)
+++ pig/branches/branch-0.15/test/org/apache/pig/test/TestJobSubmission.java
Mon Nov 2 19:52:05 2015
@@ -251,7 +251,7 @@ abstract public class TestJobSubmission
jc=jcc.compile(mrPlan, "Test");
job = jc.getWaitingJobs().get(0);
- Util.assertParallelValues(-1, -1, -1, 1, job.getJobConf());
+ Util.assertParallelValues(-1, -1, 1, 1, job.getJobConf());
util.deleteTable(Bytes.toBytesBinary("test_table"));
// In HBase 0.90.1 and above we can use util.shutdownMiniHBaseCluster()