Author: daijy Date: Sat Mar 20 01:12:17 2010 New Revision: 925513 URL: http://svn.apache.org/viewvc?rev=925513&view=rev Log: PIG-1307: when we spill the DefaultDataBag we are not setting the sized changed flag to be true.
Modified: hadoop/pig/trunk/CHANGES.txt hadoop/pig/trunk/src/org/apache/pig/data/DefaultAbstractBag.java hadoop/pig/trunk/src/org/apache/pig/data/InternalCachedBag.java hadoop/pig/trunk/src/org/apache/pig/data/InternalDistinctBag.java hadoop/pig/trunk/src/org/apache/pig/data/InternalSortedBag.java Modified: hadoop/pig/trunk/CHANGES.txt URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/CHANGES.txt?rev=925513&r1=925512&r2=925513&view=diff ============================================================================== --- hadoop/pig/trunk/CHANGES.txt (original) +++ hadoop/pig/trunk/CHANGES.txt Sat Mar 20 01:12:17 2010 @@ -159,6 +159,8 @@ OPTIMIZATIONS BUG FIXES +PIG-1307: when we spill the DefaultDataBag we are not setting the sized changed flag to be true. (breed via daijy) + PIG-1298: Restore file traversal behavior to Pig loaders (rding) PIG-1289: PIG Join fails while doing a filter on joined data (daijy) Modified: hadoop/pig/trunk/src/org/apache/pig/data/DefaultAbstractBag.java URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/src/org/apache/pig/data/DefaultAbstractBag.java?rev=925513&r1=925512&r2=925513&view=diff ============================================================================== --- hadoop/pig/trunk/src/org/apache/pig/data/DefaultAbstractBag.java (original) +++ hadoop/pig/trunk/src/org/apache/pig/data/DefaultAbstractBag.java Sat Mar 20 01:12:17 2010 @@ -61,7 +61,7 @@ public abstract class DefaultAbstractBag // to run through the disk when people ask. protected long mSize = 0; - protected boolean mMemSizeChanged = false; + protected int mLastContentsSize = -1; protected long mMemSize = 0; @@ -78,7 +78,6 @@ public abstract class DefaultAbstractBag */ public void add(Tuple t) { synchronized (mContents) { - mMemSizeChanged = true; mSize++; mContents.add(t); } @@ -90,7 +89,6 @@ public abstract class DefaultAbstractBag */ public void addAll(DataBag b) { synchronized (mContents) { - mMemSizeChanged = true; mSize += b.size(); Iterator<Tuple> i = b.iterator(); while (i.hasNext()) mContents.add(i.next()); @@ -103,7 +101,6 @@ public abstract class DefaultAbstractBag */ public void addAll(Collection<Tuple> c) { synchronized (mContents) { - mMemSizeChanged = true; mSize += c.size(); Iterator<Tuple> i = c.iterator(); while (i.hasNext()) mContents.add(i.next()); @@ -114,22 +111,24 @@ public abstract class DefaultAbstractBag * Return the size of memory usage. */ public long getMemorySize() { - if (!mMemSizeChanged) return mMemSize; - - long used = 0; - // I can't afford to talk through all the tuples every time the - // memory manager wants to know if it's time to dump. Just sample - // the first 100 and see what we get. This may not be 100% - // accurate, but it's just an estimate anyway. int j; int numInMem = 0; + long used = 0; + synchronized (mContents) { + if (mLastContentsSize == mContents.size()) return mMemSize; + + // I can't afford to talk through all the tuples every time the + // memory manager wants to know if it's time to dump. Just sample + // the first 100 and see what we get. This may not be 100% + // accurate, but it's just an estimate anyway. numInMem = mContents.size(); // Measure only what's in memory, not what's on disk. Iterator<Tuple> i = mContents.iterator(); for (j = 0; i.hasNext() && j < 100; j++) { used += i.next().getMemorySize(); } + mLastContentsSize = numInMem; } if (numInMem > 100) { @@ -150,7 +149,6 @@ public abstract class DefaultAbstractBag } mMemSize = used; - mMemSizeChanged = false; return used; } Modified: hadoop/pig/trunk/src/org/apache/pig/data/InternalCachedBag.java URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/src/org/apache/pig/data/InternalCachedBag.java?rev=925513&r1=925512&r2=925513&view=diff ============================================================================== --- hadoop/pig/trunk/src/org/apache/pig/data/InternalCachedBag.java (original) +++ hadoop/pig/trunk/src/org/apache/pig/data/InternalCachedBag.java Sat Mar 20 01:12:17 2010 @@ -83,7 +83,6 @@ public class InternalCachedBag extends D } if(mContents.size() < cacheLimit) { - mMemSizeChanged = true; mContents.add(t); if(mContents.size() < 100) { Modified: hadoop/pig/trunk/src/org/apache/pig/data/InternalDistinctBag.java URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/src/org/apache/pig/data/InternalDistinctBag.java?rev=925513&r1=925512&r2=925513&view=diff ============================================================================== --- hadoop/pig/trunk/src/org/apache/pig/data/InternalDistinctBag.java (original) +++ hadoop/pig/trunk/src/org/apache/pig/data/InternalDistinctBag.java Sat Mar 20 01:12:17 2010 @@ -149,7 +149,6 @@ public class InternalDistinctBag extends } if (mContents.add(t)) { - mMemSizeChanged = true; mSize ++; // check how many tuples memory can hold by getting average @@ -227,7 +226,6 @@ public class InternalDistinctBag extends } } mContents.clear(); - mMemSizeChanged = true; memUsage = 0; // Increment the spill count Modified: hadoop/pig/trunk/src/org/apache/pig/data/InternalSortedBag.java URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/src/org/apache/pig/data/InternalSortedBag.java?rev=925513&r1=925512&r2=925513&view=diff ============================================================================== --- hadoop/pig/trunk/src/org/apache/pig/data/InternalSortedBag.java (original) +++ hadoop/pig/trunk/src/org/apache/pig/data/InternalSortedBag.java Sat Mar 20 01:12:17 2010 @@ -145,7 +145,6 @@ public class InternalSortedBag extends D spill(); } - mMemSizeChanged = true; mContents.add(t); // check how many tuples memory can hold by getting average @@ -236,7 +235,6 @@ public class InternalSortedBag extends D } } mContents.clear(); - mMemSizeChanged = true; memUsage = 0; // Increment the spill count