Author: hashutosh
Date: Wed Feb 10 02:30:45 2010
New Revision: 908324
URL: http://svn.apache.org/viewvc?rev=908324&view=rev
Log:
PIG-1230: Streaming input in POJoinPackage should use nonspillable bag to
collect tuples
Modified:
hadoop/pig/trunk/CHANGES.txt
hadoop/pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/relationalOperators/POJoinPackage.java
hadoop/pig/trunk/src/org/apache/pig/data/NonSpillableDataBag.java
Modified: hadoop/pig/trunk/CHANGES.txt
URL:
http://svn.apache.org/viewvc/hadoop/pig/trunk/CHANGES.txt?rev=908324&r1=908323&r2=908324&view=diff
==============================================================================
--- hadoop/pig/trunk/CHANGES.txt (original)
+++ hadoop/pig/trunk/CHANGES.txt Wed Feb 10 02:30:45 2010
@@ -24,6 +24,9 @@
IMPROVEMENTS
+PIG-1230: Streaming input in POJoinPackage should use nonspillable bag to
+collect tuples (ashutoshc)
+
PIG-1224: Collected group should change to use new (internal) bag (ashutoshc)
PIG-1046: join algorithm specification is within double quotes (ashutoshc)
Modified:
hadoop/pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/relationalOperators/POJoinPackage.java
URL:
http://svn.apache.org/viewvc/hadoop/pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/relationalOperators/POJoinPackage.java?rev=908324&r1=908323&r2=908324&view=diff
==============================================================================
---
hadoop/pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/relationalOperators/POJoinPackage.java
(original)
+++
hadoop/pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/relationalOperators/POJoinPackage.java
Wed Feb 10 02:30:45 2010
@@ -29,6 +29,7 @@
import org.apache.pig.data.DataBag;
import org.apache.pig.data.DataType;
import org.apache.pig.data.InternalCachedBag;
+import org.apache.pig.data.NonSpillableDataBag;
import org.apache.pig.data.Tuple;
import org.apache.pig.impl.io.NullableTuple;
import org.apache.pig.impl.plan.NodeIdGenerator;
@@ -140,14 +141,16 @@
lastInputTuple = false;
//Put n-1 inputs into bags
dbs = new DataBag[numInputs];
- for (int i = 0; i < numInputs; i++) {
+ for (int i = 0; i < numInputs - 1; i++) {
dbs[i] = useDefaultBag ?
BagFactory.getInstance().newDefaultBag()
// In a very rare case if there is a POStream after this
// POJoinPackage in the pipeline and is also blocking the
pipeline;
// constructor argument should be 2 * numInputs. But for one
obscure
// case we don't want to pay the penalty all the time.
- : new InternalCachedBag(numInputs);
+ : new InternalCachedBag(numInputs-1);
}
+ // For last bag, we always use NonSpillableBag.
+ dbs[lastBagIndex] = new NonSpillableDataBag((int)chunkSize);
//For each Nullable tuple in the input, put it
//into the corresponding bag based on the index,
Modified: hadoop/pig/trunk/src/org/apache/pig/data/NonSpillableDataBag.java
URL:
http://svn.apache.org/viewvc/hadoop/pig/trunk/src/org/apache/pig/data/NonSpillableDataBag.java?rev=908324&r1=908323&r2=908324&view=diff
==============================================================================
--- hadoop/pig/trunk/src/org/apache/pig/data/NonSpillableDataBag.java (original)
+++ hadoop/pig/trunk/src/org/apache/pig/data/NonSpillableDataBag.java Wed Feb
10 02:30:45 2010
@@ -55,6 +55,15 @@
}
/**
+ * Use this constructor if you know upfront how many tuples you are going
+ * to put in this bag.
+ * @param tupleCount
+ */
+ public NonSpillableDataBag(int tupleCount){
+ mContents = new ArrayList<Tuple>(tupleCount);
+ }
+
+ /**
* This constructor creates a bag out of an existing list
* of tuples by taking ownership of the list and NOT
* copying the contents of the list.