Author: pradeepkth Date: Fri Apr 24 16:53:33 2009 New Revision: 768366 URL: http://svn.apache.org/viewvc?rev=768366&view=rev Log: PIG-775: PORelationToExprProject should create a NonSpillableDataBag to create empty bags (pradeepkth)
Modified: hadoop/pig/trunk/CHANGES.txt hadoop/pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/expressionOperators/PORelationToExprProject.java hadoop/pig/trunk/src/org/apache/pig/data/DefaultAbstractBag.java hadoop/pig/trunk/src/org/apache/pig/data/NonSpillableDataBag.java hadoop/pig/trunk/test/org/apache/pig/test/TestDataBag.java Modified: hadoop/pig/trunk/CHANGES.txt URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/CHANGES.txt?rev=768366&r1=768365&r2=768366&view=diff ============================================================================== --- hadoop/pig/trunk/CHANGES.txt (original) +++ hadoop/pig/trunk/CHANGES.txt Fri Apr 24 16:53:33 2009 @@ -31,6 +31,9 @@ PIG-712: Added utility functions to create schemas for tuples and bags (zjffdu via gates). +PIG-775: PORelationToExprProject should create a NonSpillableDataBag to create +empty bags (pradeepkth) + BUG FIXES PIG-733: Order by sampling dumps entire sample to hdfs which causes dfs Modified: hadoop/pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/expressionOperators/PORelationToExprProject.java URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/expressionOperators/PORelationToExprProject.java?rev=768366&r1=768365&r2=768366&view=diff ============================================================================== --- hadoop/pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/expressionOperators/PORelationToExprProject.java (original) +++ hadoop/pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/expressionOperators/PORelationToExprProject.java Fri Apr 24 16:53:33 2009 @@ -25,6 +25,7 @@ import org.apache.pig.backend.hadoop.executionengine.physicalLayer.plans.PhyPlanVisitor; import org.apache.pig.data.DataBag; import org.apache.pig.data.DataType; +import org.apache.pig.data.NonSpillableDataBag; import org.apache.pig.impl.plan.NodeIdGenerator; import org.apache.pig.impl.plan.OperatorKey; import org.apache.pig.impl.plan.VisitorException; @@ -109,7 +110,7 @@ // we received an EOP from the predecessor // since the successor in the pipeline is // expecting a bag, send an empty bag - input.result = bagFactory.newDefaultBag(); + input.result = new NonSpillableDataBag(); input.returnStatus = POStatus.STATUS_OK; // we should send EOP the next time we are called // if the foreach in which this operator is present Modified: hadoop/pig/trunk/src/org/apache/pig/data/DefaultAbstractBag.java URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/src/org/apache/pig/data/DefaultAbstractBag.java?rev=768366&r1=768365&r2=768366&view=diff ============================================================================== --- hadoop/pig/trunk/src/org/apache/pig/data/DefaultAbstractBag.java (original) +++ hadoop/pig/trunk/src/org/apache/pig/data/DefaultAbstractBag.java Fri Apr 24 16:53:33 2009 @@ -188,7 +188,7 @@ while (i.hasNext()) thisClone.add(i.next()); } if (other instanceof SortedDataBag || - this instanceof DistinctDataBag) { + other instanceof DistinctDataBag) { otherClone = bOther; } else { otherClone = new SortedDataBag(null); Modified: hadoop/pig/trunk/src/org/apache/pig/data/NonSpillableDataBag.java URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/src/org/apache/pig/data/NonSpillableDataBag.java?rev=768366&r1=768365&r2=768366&view=diff ============================================================================== --- hadoop/pig/trunk/src/org/apache/pig/data/NonSpillableDataBag.java (original) +++ hadoop/pig/trunk/src/org/apache/pig/data/NonSpillableDataBag.java Fri Apr 24 16:53:33 2009 @@ -203,8 +203,26 @@ else return -1; } - Iterator<Tuple> thisIt = this.iterator(); - Iterator<Tuple> otherIt = bOther.iterator(); + // Ugh, this is bogus. But I have to know if two bags have the + // same tuples, regardless of order. Hopefully most of the + // time the size check above will prevent this. + // If either bag isn't already sorted, create a sorted bag out + // of it so I can guarantee order. + DataBag thisClone; + DataBag otherClone; + thisClone = new SortedDataBag(null); + Iterator<Tuple> i = iterator(); + while (i.hasNext()) thisClone.add(i.next()); + if (other instanceof SortedDataBag || + other instanceof DistinctDataBag) { + otherClone = bOther; + } else { + otherClone = new SortedDataBag(null); + i = bOther.iterator(); + while (i.hasNext()) otherClone.add(i.next()); + } + Iterator<Tuple> thisIt = thisClone.iterator(); + Iterator<Tuple> otherIt = otherClone.iterator(); while (thisIt.hasNext() && otherIt.hasNext()) { Tuple thisT = thisIt.next(); Tuple otherT = otherIt.next(); Modified: hadoop/pig/trunk/test/org/apache/pig/test/TestDataBag.java URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/test/org/apache/pig/test/TestDataBag.java?rev=768366&r1=768365&r2=768366&view=diff ============================================================================== --- hadoop/pig/trunk/test/org/apache/pig/test/TestDataBag.java (original) +++ hadoop/pig/trunk/test/org/apache/pig/test/TestDataBag.java Fri Apr 24 16:53:33 2009 @@ -729,6 +729,68 @@ BagFactory.resetSelf(); } + + @Test + public void testNonSpillableDataBagEquals1() throws Exception { + String[][] tupleContents = new String[][] {{"a", "b"},{"c", "d" }, { "e", "f"} }; + NonSpillableDataBag bg1 = new NonSpillableDataBag(); + for (int i = 0; i < tupleContents.length; i++) { + bg1.add(Util.createTuple(tupleContents[i])); + } + NonSpillableDataBag bg2 = new NonSpillableDataBag(); + for (int i = 0; i < tupleContents.length; i++) { + bg2.add(Util.createTuple(tupleContents[i])); + } + assertEquals(bg1, bg2); + } + + @Test + public void testNonSpillableDataBagEquals2() throws Exception { + String[][] tupleContents = new String[][] {{"a", "b"},{"c", "d" }, { "e", "f"} }; + NonSpillableDataBag bg1 = new NonSpillableDataBag(); + for (int i = 0; i < tupleContents.length; i++) { + bg1.add(Util.createTuple(tupleContents[i])); + } + tupleContents = new String[][] {{"c", "d" }, {"a", "b"},{ "e", "f"} }; + NonSpillableDataBag bg2 = new NonSpillableDataBag(); + for (int i = 0; i < tupleContents.length; i++) { + bg2.add(Util.createTuple(tupleContents[i])); + } + assertEquals(bg1, bg2); + } + + @Test + public void testDefaultDataBagEquals1() throws Exception { + String[][] tupleContents = new String[][] {{"a", "b"},{"c", "d" }, { "e", "f"} }; + TestMemoryManager mgr = new TestMemoryManager(); + LocalBagFactory factory = new LocalBagFactory(mgr); + DataBag bg1 = factory.newDefaultBag(); + for (int i = 0; i < tupleContents.length; i++) { + bg1.add(Util.createTuple(tupleContents[i])); + } + DataBag bg2 = factory.newDefaultBag(); + for (int i = 0; i < tupleContents.length; i++) { + bg2.add(Util.createTuple(tupleContents[i])); + } + assertEquals(bg1, bg2); + } + + @Test + public void testDefaultDataBagEquals2() throws Exception { + String[][] tupleContents = new String[][] {{"a", "b"},{"c", "d" }, { "e", "f"} }; + TestMemoryManager mgr = new TestMemoryManager(); + LocalBagFactory factory = new LocalBagFactory(mgr); + DataBag bg1 = factory.newDefaultBag(); + for (int i = 0; i < tupleContents.length; i++) { + bg1.add(Util.createTuple(tupleContents[i])); + } + tupleContents = new String[][] {{"c", "d" }, {"a", "b"},{ "e", "f"} }; + DataBag bg2 = factory.newDefaultBag(); + for (int i = 0; i < tupleContents.length; i++) { + bg2.add(Util.createTuple(tupleContents[i])); + } + assertEquals(bg1, bg2); + } }