Author: olga
Date: Tue Dec 22 18:22:20 2009
New Revision: 893259
URL: http://svn.apache.org/viewvc?rev=893259view=rev
Log:
PIG-1143: Poisson Sample Loader should compute the number of samples required
only once (sriranjan via olgan)
Modified:
hadoop/pig/branches/branch-0.6/CHANGES.txt
hadoop/pig/branches/branch-0.6/src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/MRCompiler.java
hadoop/pig/branches/branch-0.6/src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/PigInputFormat.java
hadoop/pig/branches/branch-0.6/src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/SliceWrapper.java
hadoop/pig/branches/branch-0.6/src/org/apache/pig/impl/builtin/PoissonSampleLoader.java
hadoop/pig/branches/branch-0.6/test/org/apache/pig/test/TestPoissonSampleLoader.java
hadoop/pig/branches/branch-0.6/test/org/apache/pig/test/TestSkewedJoin.java
hadoop/pig/branches/branch-0.6/test/org/apache/pig/test/Util.java
Modified: hadoop/pig/branches/branch-0.6/CHANGES.txt
URL:
http://svn.apache.org/viewvc/hadoop/pig/branches/branch-0.6/CHANGES.txt?rev=893259r1=893258r2=893259view=diff
==
--- hadoop/pig/branches/branch-0.6/CHANGES.txt (original)
+++ hadoop/pig/branches/branch-0.6/CHANGES.txt Tue Dec 22 18:22:20 2009
@@ -141,6 +141,9 @@
BUG FIXES
+PIG-1143: Poisson Sample Loader should compute the number of samples required
+only once (sriranjan via olgan)
+
PIG-1135: skewed join partitioner returns negative partition index (yinghe
via olgan)
PIG-1134: Skewed Join sampling job overwhelms the name node (sriranjan via
olgan)
Modified:
hadoop/pig/branches/branch-0.6/src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/MRCompiler.java
URL:
http://svn.apache.org/viewvc/hadoop/pig/branches/branch-0.6/src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/MRCompiler.java?rev=893259r1=893258r2=893259view=diff
==
---
hadoop/pig/branches/branch-0.6/src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/MRCompiler.java
(original)
+++
hadoop/pig/branches/branch-0.6/src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/MRCompiler.java
Tue Dec 22 18:22:20 2009
@@ -1401,6 +1401,7 @@
}
FileSpec fSpec = getTempFileSpec();
+ System.err.println(#...@#@ fSpec: +
fSpec.getFileName());
MapReduceOper mro = compiledInputs[0];
POStore str = getStore();
str.setSFile(fSpec);
@@ -1845,7 +1846,7 @@
String inputFile = lFile.getFileName();
return getSamplingJob(sort, prevJob, transformPlans, lFile,
sampleFile, rp, null,
-
PartitionSkewedKeys.class.getName(), new String[]{per, mc, inputFile},
RandomSampleLoader.class.getName());
+
PartitionSkewedKeys.class.getName(), new String[]{per, mc, inputFile},
PoissonSampleLoader.class.getName());
}catch(Exception e) {
throw new PlanException(e);
}
Modified:
hadoop/pig/branches/branch-0.6/src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/PigInputFormat.java
URL:
http://svn.apache.org/viewvc/hadoop/pig/branches/branch-0.6/src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/PigInputFormat.java?rev=893259r1=893258r2=893259view=diff
==
---
hadoop/pig/branches/branch-0.6/src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/PigInputFormat.java
(original)
+++
hadoop/pig/branches/branch-0.6/src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/PigInputFormat.java
Tue Dec 22 18:22:20 2009
@@ -20,6 +20,7 @@
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
+import java.util.Properties;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
@@ -50,11 +51,14 @@
import
org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.SliceWrapper;
import org.apache.pig.data.Tuple;
import org.apache.pig.impl.PigContext;
+import org.apache.pig.impl.builtin.SampleLoader;
+import org.apache.pig.impl.io.FileLocalizer;
import org.apache.pig.impl.io.FileSpec;
import org.apache.pig.impl.io.ValidatingInputFileSpec;
import org.apache.pig.impl.plan.OperatorKey;
import org.apache.pig.impl.util.ObjectSerializer;
import org.apache.pig.impl.util.Pair;
+import org.apache.pig.impl.util.UDFContext;
public class PigInputFormat implements InputFormatText, Tuple,
JobConfigurable {
@@ -239,6 +243,20 @@
if ((spec.getSlicer() instanceof PigSlicer)) {