Author: pradeepkth
Date: Tue May 12 01:44:52 2009
New Revision: 773755
URL: http://svn.apache.org/viewvc?rev=773755&view=rev
Log:
PIG-775: PORelationToExprProject should create a NonSpillableDataBag to create
empty bags (pradeepkth)
Modified:
hadoop/pig/branches/pre-multiquery-phase2/CHANGES.txt
hadoop/pig/branches/pre-multiquery-phase2/src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/expressionOperators/PORelationToExprProject.java
hadoop/pig/branches/pre-multiquery-phase2/src/org/apache/pig/data/DefaultAbstractBag.java
hadoop/pig/branches/pre-multiquery-phase2/src/org/apache/pig/data/NonSpillableDataBag.java
hadoop/pig/branches/pre-multiquery-phase2/test/org/apache/pig/test/TestDataBag.java
Modified: hadoop/pig/branches/pre-multiquery-phase2/CHANGES.txt
URL:
http://svn.apache.org/viewvc/hadoop/pig/branches/pre-multiquery-phase2/CHANGES.txt?rev=773755&r1=773754&r2=773755&view=diff
==============================================================================
--- hadoop/pig/branches/pre-multiquery-phase2/CHANGES.txt (original)
+++ hadoop/pig/branches/pre-multiquery-phase2/CHANGES.txt Tue May 12 01:44:52
2009
@@ -28,6 +28,8 @@
PIG-700: To automate the pig patch test process (gkesavan via sms)
+PIG-775: PORelationToExprProject should create a NonSpillableDataBag to create
empty bags (pradeepkth)
+
BUG FIXES
PIG-733: Order by sampling dumps entire sample to hdfs which causes dfs
Modified:
hadoop/pig/branches/pre-multiquery-phase2/src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/expressionOperators/PORelationToExprProject.java
URL:
http://svn.apache.org/viewvc/hadoop/pig/branches/pre-multiquery-phase2/src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/expressionOperators/PORelationToExprProject.java?rev=773755&r1=773754&r2=773755&view=diff
==============================================================================
---
hadoop/pig/branches/pre-multiquery-phase2/src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/expressionOperators/PORelationToExprProject.java
(original)
+++
hadoop/pig/branches/pre-multiquery-phase2/src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/expressionOperators/PORelationToExprProject.java
Tue May 12 01:44:52 2009
@@ -25,6 +25,7 @@
import
org.apache.pig.backend.hadoop.executionengine.physicalLayer.plans.PhyPlanVisitor;
import org.apache.pig.data.DataBag;
import org.apache.pig.data.DataType;
+import org.apache.pig.data.NonSpillableDataBag;
import org.apache.pig.impl.plan.NodeIdGenerator;
import org.apache.pig.impl.plan.OperatorKey;
import org.apache.pig.impl.plan.VisitorException;
@@ -109,7 +110,7 @@
// we received an EOP from the predecessor
// since the successor in the pipeline is
// expecting a bag, send an empty bag
- input.result = bagFactory.newDefaultBag();
+ input.result = new NonSpillableDataBag();
input.returnStatus = POStatus.STATUS_OK;
// we should send EOP the next time we are called
// if the foreach in which this operator is present
Modified:
hadoop/pig/branches/pre-multiquery-phase2/src/org/apache/pig/data/DefaultAbstractBag.java
URL:
http://svn.apache.org/viewvc/hadoop/pig/branches/pre-multiquery-phase2/src/org/apache/pig/data/DefaultAbstractBag.java?rev=773755&r1=773754&r2=773755&view=diff
==============================================================================
---
hadoop/pig/branches/pre-multiquery-phase2/src/org/apache/pig/data/DefaultAbstractBag.java
(original)
+++
hadoop/pig/branches/pre-multiquery-phase2/src/org/apache/pig/data/DefaultAbstractBag.java
Tue May 12 01:44:52 2009
@@ -188,7 +188,7 @@
while (i.hasNext()) thisClone.add(i.next());
}
if (other instanceof SortedDataBag ||
- this instanceof DistinctDataBag) {
+ other instanceof DistinctDataBag) {
otherClone = bOther;
} else {
otherClone = new SortedDataBag(null);
Modified:
hadoop/pig/branches/pre-multiquery-phase2/src/org/apache/pig/data/NonSpillableDataBag.java
URL:
http://svn.apache.org/viewvc/hadoop/pig/branches/pre-multiquery-phase2/src/org/apache/pig/data/NonSpillableDataBag.java?rev=773755&r1=773754&r2=773755&view=diff
==============================================================================
---
hadoop/pig/branches/pre-multiquery-phase2/src/org/apache/pig/data/NonSpillableDataBag.java
(original)
+++
hadoop/pig/branches/pre-multiquery-phase2/src/org/apache/pig/data/NonSpillableDataBag.java
Tue May 12 01:44:52 2009
@@ -203,8 +203,26 @@
else return -1;
}
- Iterator<Tuple> thisIt = this.iterator();
- Iterator<Tuple> otherIt = bOther.iterator();
+ // Ugh, this is bogus. But I have to know if two bags have the
+ // same tuples, regardless of order. Hopefully most of the
+ // time the size check above will prevent this.
+ // If either bag isn't already sorted, create a sorted bag out
+ // of it so I can guarantee order.
+ DataBag thisClone;
+ DataBag otherClone;
+ thisClone = new SortedDataBag(null);
+ Iterator<Tuple> i = iterator();
+ while (i.hasNext()) thisClone.add(i.next());
+ if (other instanceof SortedDataBag ||
+ other instanceof DistinctDataBag) {
+ otherClone = bOther;
+ } else {
+ otherClone = new SortedDataBag(null);
+ i = bOther.iterator();
+ while (i.hasNext()) otherClone.add(i.next());
+ }
+ Iterator<Tuple> thisIt = thisClone.iterator();
+ Iterator<Tuple> otherIt = otherClone.iterator();
while (thisIt.hasNext() && otherIt.hasNext()) {
Tuple thisT = thisIt.next();
Tuple otherT = otherIt.next();
Modified:
hadoop/pig/branches/pre-multiquery-phase2/test/org/apache/pig/test/TestDataBag.java
URL:
http://svn.apache.org/viewvc/hadoop/pig/branches/pre-multiquery-phase2/test/org/apache/pig/test/TestDataBag.java?rev=773755&r1=773754&r2=773755&view=diff
==============================================================================
---
hadoop/pig/branches/pre-multiquery-phase2/test/org/apache/pig/test/TestDataBag.java
(original)
+++
hadoop/pig/branches/pre-multiquery-phase2/test/org/apache/pig/test/TestDataBag.java
Tue May 12 01:44:52 2009
@@ -729,6 +729,68 @@
BagFactory.resetSelf();
}
+
+ @Test
+ public void testNonSpillableDataBagEquals1() throws Exception {
+ String[][] tupleContents = new String[][] {{"a", "b"},{"c", "d" }, {
"e", "f"} };
+ NonSpillableDataBag bg1 = new NonSpillableDataBag();
+ for (int i = 0; i < tupleContents.length; i++) {
+ bg1.add(Util.createTuple(tupleContents[i]));
+ }
+ NonSpillableDataBag bg2 = new NonSpillableDataBag();
+ for (int i = 0; i < tupleContents.length; i++) {
+ bg2.add(Util.createTuple(tupleContents[i]));
+ }
+ assertEquals(bg1, bg2);
+ }
+
+ @Test
+ public void testNonSpillableDataBagEquals2() throws Exception {
+ String[][] tupleContents = new String[][] {{"a", "b"},{"c", "d" }, {
"e", "f"} };
+ NonSpillableDataBag bg1 = new NonSpillableDataBag();
+ for (int i = 0; i < tupleContents.length; i++) {
+ bg1.add(Util.createTuple(tupleContents[i]));
+ }
+ tupleContents = new String[][] {{"c", "d" }, {"a", "b"},{ "e", "f"} };
+ NonSpillableDataBag bg2 = new NonSpillableDataBag();
+ for (int i = 0; i < tupleContents.length; i++) {
+ bg2.add(Util.createTuple(tupleContents[i]));
+ }
+ assertEquals(bg1, bg2);
+ }
+
+ @Test
+ public void testDefaultDataBagEquals1() throws Exception {
+ String[][] tupleContents = new String[][] {{"a", "b"},{"c", "d" }, {
"e", "f"} };
+ TestMemoryManager mgr = new TestMemoryManager();
+ LocalBagFactory factory = new LocalBagFactory(mgr);
+ DataBag bg1 = factory.newDefaultBag();
+ for (int i = 0; i < tupleContents.length; i++) {
+ bg1.add(Util.createTuple(tupleContents[i]));
+ }
+ DataBag bg2 = factory.newDefaultBag();
+ for (int i = 0; i < tupleContents.length; i++) {
+ bg2.add(Util.createTuple(tupleContents[i]));
+ }
+ assertEquals(bg1, bg2);
+ }
+
+ @Test
+ public void testDefaultDataBagEquals2() throws Exception {
+ String[][] tupleContents = new String[][] {{"a", "b"},{"c", "d" }, {
"e", "f"} };
+ TestMemoryManager mgr = new TestMemoryManager();
+ LocalBagFactory factory = new LocalBagFactory(mgr);
+ DataBag bg1 = factory.newDefaultBag();
+ for (int i = 0; i < tupleContents.length; i++) {
+ bg1.add(Util.createTuple(tupleContents[i]));
+ }
+ tupleContents = new String[][] {{"c", "d" }, {"a", "b"},{ "e", "f"} };
+ DataBag bg2 = factory.newDefaultBag();
+ for (int i = 0; i < tupleContents.length; i++) {
+ bg2.add(Util.createTuple(tupleContents[i]));
+ }
+ assertEquals(bg1, bg2);
+ }
}