Author: yanz Date: Sat Sep 25 03:55:10 2010 New Revision: 1001133 URL: http://svn.apache.org/viewvc?rev=1001133&view=rev Log: PIG-1645: Using both small split combination and temporary file compression on a query of ORDER BY may cause crash (yanz)
Modified: hadoop/pig/trunk/CHANGES.txt hadoop/pig/trunk/src/org/apache/pig/impl/builtin/PoissonSampleLoader.java hadoop/pig/trunk/src/org/apache/pig/impl/builtin/RandomSampleLoader.java Modified: hadoop/pig/trunk/CHANGES.txt URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/CHANGES.txt?rev=1001133&r1=1001132&r2=1001133&view=diff ============================================================================== --- hadoop/pig/trunk/CHANGES.txt (original) +++ hadoop/pig/trunk/CHANGES.txt Sat Sep 25 03:55:10 2010 @@ -207,6 +207,8 @@ PIG-1309: Map-side Cogroup (ashutoshc) BUG FIXES +PIG-1645: Using both small split combination and temporary file compression on a query of ORDER BY may cause crash (yanz) + PIG-1635: Logical simplifier does not simplify away constants under AND and OR; after simplificaion the ordering of operands of AND and OR may get changed (yanz) Modified: hadoop/pig/trunk/src/org/apache/pig/impl/builtin/PoissonSampleLoader.java URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/src/org/apache/pig/impl/builtin/PoissonSampleLoader.java?rev=1001133&r1=1001132&r2=1001133&view=diff ============================================================================== --- hadoop/pig/trunk/src/org/apache/pig/impl/builtin/PoissonSampleLoader.java (original) +++ hadoop/pig/trunk/src/org/apache/pig/impl/builtin/PoissonSampleLoader.java Sat Sep 25 03:55:10 2010 @@ -22,7 +22,9 @@ import java.util.ArrayList; import java.util.Properties; +import org.apache.hadoop.mapreduce.RecordReader; import org.apache.pig.backend.executionengine.ExecException; +import org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.PigSplit; import org.apache.pig.data.Tuple; import org.apache.pig.data.TupleFactory; import org.apache.pig.impl.PigContext; @@ -198,5 +200,19 @@ public class PoissonSampleLoader extends } } + + @Override + public void prepareToRead(RecordReader reader, PigSplit split) throws IOException { + super.prepareToRead(reader, split); + numRowsSampled = 0; + avgTupleMemSz = 0; + rowNum = 0; + skipInterval = -1; + memToSkipPerSample = 0; + numRowSplTupleReturned = false; + sampleRate = DEFAULT_SAMPLE_RATE; + heapPerc = PartitionSkewedKeys.DEFAULT_PERCENT_MEMUSAGE; + newSample = null; + } } Modified: hadoop/pig/trunk/src/org/apache/pig/impl/builtin/RandomSampleLoader.java URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/src/org/apache/pig/impl/builtin/RandomSampleLoader.java?rev=1001133&r1=1001132&r2=1001133&view=diff ============================================================================== --- hadoop/pig/trunk/src/org/apache/pig/impl/builtin/RandomSampleLoader.java (original) +++ hadoop/pig/trunk/src/org/apache/pig/impl/builtin/RandomSampleLoader.java Sat Sep 25 03:55:10 2010 @@ -21,6 +21,8 @@ import java.io.IOException; import java.util.Random; import org.apache.pig.data.Tuple; +import org.apache.hadoop.mapreduce.RecordReader; +import org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.PigSplit; /** * A loader that samples the data. @@ -101,6 +103,13 @@ public class RandomSampleLoader extends return getSample(); } + @Override + public void prepareToRead(RecordReader reader, PigSplit split) throws IOException { + super.prepareToRead(reader, split); + samples = null; + nextSampleIdx = 0; + } + private Tuple getSample() { if(nextSampleIdx < samples.length){ return samples[nextSampleIdx++];