Author: gates Date: Thu May 7 19:23:25 2009 New Revision: 772750 URL: http://svn.apache.org/viewvc?rev=772750&view=rev Log: PIG-800: Fix distinct and order in local mode to not go into an infinite loop.
Modified: hadoop/pig/trunk/CHANGES.txt hadoop/pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/relationalOperators/PODistinct.java hadoop/pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/relationalOperators/POSort.java hadoop/pig/trunk/test/org/apache/pig/test/TestLocal2.java Modified: hadoop/pig/trunk/CHANGES.txt URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/CHANGES.txt?rev=772750&r1=772749&r2=772750&view=diff ============================================================================== --- hadoop/pig/trunk/CHANGES.txt (original) +++ hadoop/pig/trunk/CHANGES.txt Thu May 7 19:23:25 2009 @@ -63,6 +63,9 @@ PIG-774: Pig does not handle Chinese characters (in both the parameter subsitution using -param_file or embedded in the Pig script) correctly (daijy) +PIG-800: Fix distinct and order in local mode to not go into an infinite loop +(gates). + Release 0.2.0 INCOMPATIBLE CHANGES Modified: hadoop/pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/relationalOperators/PODistinct.java URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/relationalOperators/PODistinct.java?rev=772750&r1=772749&r2=772750&view=diff ============================================================================== --- hadoop/pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/relationalOperators/PODistinct.java (original) +++ hadoop/pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/relationalOperators/PODistinct.java Thu May 7 19:23:25 2009 @@ -79,8 +79,14 @@ while (in.returnStatus != POStatus.STATUS_EOP) { if (in.returnStatus == POStatus.STATUS_ERR) { log.error("Error in reading from inputs"); - continue; + return in; + //continue; } else if (in.returnStatus == POStatus.STATUS_NULL) { + // Ignore the null, read the next tuple. It's not clear + // to me that we should ever get this, or if we should, + // how it differs from EOP. But ignoring it here seems + // to work. + in = processInput(); continue; } distinctBag.add((Tuple) in.result); Modified: hadoop/pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/relationalOperators/POSort.java URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/relationalOperators/POSort.java?rev=772750&r1=772749&r2=772750&view=diff ============================================================================== --- hadoop/pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/relationalOperators/POSort.java (original) +++ hadoop/pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/relationalOperators/POSort.java Thu May 7 19:23:25 2009 @@ -258,8 +258,11 @@ while (res.returnStatus != POStatus.STATUS_EOP) { if (res.returnStatus == POStatus.STATUS_ERR) { log.error("Error in reading from the inputs"); - continue; + return res; + //continue; } else if (res.returnStatus == POStatus.STATUS_NULL) { + // ignore the null, read the next tuple. + res = processInput(); continue; } sortedBag.add((Tuple) res.result); Modified: hadoop/pig/trunk/test/org/apache/pig/test/TestLocal2.java URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/test/org/apache/pig/test/TestLocal2.java?rev=772750&r1=772749&r2=772750&view=diff ============================================================================== --- hadoop/pig/trunk/test/org/apache/pig/test/TestLocal2.java (original) +++ hadoop/pig/trunk/test/org/apache/pig/test/TestLocal2.java Thu May 7 19:23:25 2009 @@ -20,13 +20,19 @@ import junit.framework.Assert; import junit.framework.TestCase; + +import org.apache.pig.EvalFunc; import org.apache.pig.PigServer; +import org.apache.pig.data.BagFactory; +import org.apache.pig.data.DataBag; import org.apache.pig.data.Tuple; import org.apache.pig.test.utils.TestHelper; import org.junit.Test; import java.io.File; +import java.io.FileOutputStream; import java.io.IOException; +import java.io.PrintStream; import java.text.DecimalFormat; import java.util.Iterator; @@ -91,6 +97,53 @@ verifyUnion( "c", 30 + 50 ); } + + @Test + public void testPig800Distinct() throws Exception { + // Regression test for Pig-800 + File fp1 = File.createTempFile("test", "txt"); + PrintStream ps = new PrintStream(new FileOutputStream(fp1)); + + ps.println("1\t1}"); + ps.close(); + + pig.registerQuery("A = load '" + Util.generateURI(fp1.toString()) + "'; "); + pig.registerQuery("B = foreach A generate flatten(" + Pig800Udf.class.getName() + "($0));"); + pig.registerQuery("C = distinct B;"); + + Iterator<Tuple> iter = pig.openIterator("C"); + // Before PIG-800 was fixed this went into an infinite loop, so just + // managing to open the iterator is sufficient. + + } + + @Test + public void testPig800Sort() throws Exception { + // Regression test for Pig-800 + File fp1 = File.createTempFile("test", "txt"); + PrintStream ps = new PrintStream(new FileOutputStream(fp1)); + + ps.println("1\t1}"); + ps.close(); + + pig.registerQuery("A = load '" + Util.generateURI(fp1.toString()) + "'; "); + pig.registerQuery("B = foreach A generate flatten(" + Pig800Udf.class.getName() + "($0));"); + pig.registerQuery("C = order B by $0;"); + + Iterator<Tuple> iter = pig.openIterator("C"); + // Before PIG-800 was fixed this went into an infinite loop, so just + // managing to open the iterator is sufficient. + + } + + static public class Pig800Udf extends EvalFunc<DataBag> { + + @Override + public DataBag exec(Tuple input) throws IOException { + DataBag output = BagFactory.getInstance().newDefaultBag(); + return output; + } + } //verifies results public void verifyUnion(String id, int actualCount ) throws Exception {