Author: daijy
Date: Tue Feb  9 19:01:26 2010
New Revision: 908158

URL: http://svn.apache.org/viewvc?rev=908158&view=rev
Log:
PIG-1231: DefaultDataBagIterator.hasNext() should be idempotent in all cases

Modified:
    hadoop/pig/branches/branch-0.6/CHANGES.txt
    hadoop/pig/branches/branch-0.6/src/org/apache/pig/data/DefaultDataBag.java
    hadoop/pig/branches/branch-0.6/test/org/apache/pig/test/TestDataBag.java
    
hadoop/pig/branches/branch-0.6/test/org/apache/pig/test/TestNullConstant.java

Modified: hadoop/pig/branches/branch-0.6/CHANGES.txt
URL: 
http://svn.apache.org/viewvc/hadoop/pig/branches/branch-0.6/CHANGES.txt?rev=908158&r1=908157&r2=908158&view=diff
==============================================================================
--- hadoop/pig/branches/branch-0.6/CHANGES.txt (original)
+++ hadoop/pig/branches/branch-0.6/CHANGES.txt Tue Feb  9 19:01:26 2010
@@ -287,6 +287,9 @@
 
 PIG-1210: fieldsToRead send the same fields more than once in some cases 
(daijy)
 
+PIG-1231: DefaultDataBagIterator.hasNext() should be idempotent in all cases
+(daijy)
+
 Release 0.5.0
 
 INCOMPATIBLE CHANGES

Modified: 
hadoop/pig/branches/branch-0.6/src/org/apache/pig/data/DefaultDataBag.java
URL: 
http://svn.apache.org/viewvc/hadoop/pig/branches/branch-0.6/src/org/apache/pig/data/DefaultDataBag.java?rev=908158&r1=908157&r2=908158&view=diff
==============================================================================
--- hadoop/pig/branches/branch-0.6/src/org/apache/pig/data/DefaultDataBag.java 
(original)
+++ hadoop/pig/branches/branch-0.6/src/org/apache/pig/data/DefaultDataBag.java 
Tue Feb  9 19:01:26 2010
@@ -49,6 +49,8 @@
 
     private static final Log log = LogFactory.getLog(DefaultDataBag.class);
     
+    boolean hasCachedTuple = false;
+    
     public DefaultDataBag() {
         mContents = new ArrayList<Tuple>();
     }
@@ -73,6 +75,7 @@
     }
     
     public Iterator<Tuple> iterator() {
+        hasCachedTuple = false;
         return new DefaultDataBagIterator();
     }
 
@@ -147,9 +150,12 @@
         }
 
         public boolean hasNext() { 
-            // See if we can find a tuple.  If so, buffer it.
+            // Once we call hasNext(), set the flag, so we can call hasNext() 
repeated without fetching next tuple
+            if (hasCachedTuple)
+                return (mBuf != null);
             mBuf = next();
-            return mBuf != null;
+            hasCachedTuple = true;
+            return (mBuf != null);
         }
 
         public Tuple next() {
@@ -158,9 +164,9 @@
             if ((mCntr++ & 0x3ff) == 0) reportProgress();
 
             // If there's one in the buffer, use that one.
-            if (mBuf != null) {
+            if (hasCachedTuple) {
                 Tuple t = mBuf;
-                mBuf = null;
+                hasCachedTuple = false;
                 return t;
             }
 

Modified: 
hadoop/pig/branches/branch-0.6/test/org/apache/pig/test/TestDataBag.java
URL: 
http://svn.apache.org/viewvc/hadoop/pig/branches/branch-0.6/test/org/apache/pig/test/TestDataBag.java?rev=908158&r1=908157&r2=908158&view=diff
==============================================================================
--- hadoop/pig/branches/branch-0.6/test/org/apache/pig/test/TestDataBag.java 
(original)
+++ hadoop/pig/branches/branch-0.6/test/org/apache/pig/test/TestDataBag.java 
Tue Feb  9 19:01:26 2010
@@ -1082,6 +1082,40 @@
         }
         assertEquals(bg6, bg7);
     }
+    
+    // See PIG-1231
+    @Test
+    public void testDataBagIterIdempotent() throws Exception {
+        DataBag bg0 = new DefaultDataBag();
+        processDataBag(bg0, true);
+        
+        DataBag bg1 = new DistinctDataBag();
+        processDataBag(bg1, true);
+        
+        DataBag bg2 = new InternalDistinctBag();
+        processDataBag(bg2, true);
+        
+        DataBag bg3 = new InternalSortedBag();
+        processDataBag(bg3, true);
+        
+        DataBag bg4 = new SortedDataBag(null);
+        processDataBag(bg4, true);
+        
+        DataBag bg5 = new InternalCachedBag(0, 0);
+        processDataBag(bg5, false);
+    }
+    
+    void processDataBag(DataBag bg, boolean doSpill) {
+        Tuple t = TupleFactory.getInstance().newTuple(new Integer(0));
+        bg.add(t);
+        if (doSpill)
+            bg.spill();
+        Iterator<Tuple> iter = bg.iterator();
+        assertTrue(iter.hasNext());
+        iter.next();
+        assertFalse(iter.hasNext());
+        assertFalse("hasNext should be idempotent", iter.hasNext());        
+    }
 }
 
 

Modified: 
hadoop/pig/branches/branch-0.6/test/org/apache/pig/test/TestNullConstant.java
URL: 
http://svn.apache.org/viewvc/hadoop/pig/branches/branch-0.6/test/org/apache/pig/test/TestNullConstant.java?rev=908158&r1=908157&r2=908158&view=diff
==============================================================================
--- 
hadoop/pig/branches/branch-0.6/test/org/apache/pig/test/TestNullConstant.java 
(original)
+++ 
hadoop/pig/branches/branch-0.6/test/org/apache/pig/test/TestNullConstant.java 
Tue Feb  9 19:01:26 2010
@@ -145,10 +145,8 @@
         pigServer.registerQuery("b = foreach a generate {(null)}, 
['2'#null];");
         Iterator<Tuple> it = pigServer.openIterator("b");
         Tuple t = it.next();
-System.out.println("tuple: " + t);
         assertEquals(null, ((DataBag)t.get(0)).iterator().next().get(0));
         assertEquals(null, ((Map<String, Object>)t.get(1)).get("2"));
-        
     }
 
     @Test


Reply via email to