Author: thejas
Date: Fri Sep 24 00:04:26 2010
New Revision: 1000667

URL: http://svn.apache.org/viewvc?rev=1000667&view=rev
Log:
PIG-1643: join fails for a query with input having 'load using pigstorage
without schema' + 'foreach' 

Modified:
    hadoop/pig/trunk/CHANGES.txt
    
hadoop/pig/trunk/src/org/apache/pig/newplan/logical/expression/ProjectExpression.java
    hadoop/pig/trunk/test/org/apache/pig/test/TestJoin.java

Modified: hadoop/pig/trunk/CHANGES.txt
URL: 
http://svn.apache.org/viewvc/hadoop/pig/trunk/CHANGES.txt?rev=1000667&r1=1000666&r2=1000667&view=diff
==============================================================================
--- hadoop/pig/trunk/CHANGES.txt (original)
+++ hadoop/pig/trunk/CHANGES.txt Fri Sep 24 00:04:26 2010
@@ -207,6 +207,9 @@ PIG-1309: Map-side Cogroup (ashutoshc)
 
 BUG FIXES
 
+PIG-1643: join fails for a query with input having 'load using pigstorage 
+without schema' + 'foreach' (thejas)
+
 PIG-1628: log this message at debug level : 'Pig Internal storage in use' 
(thejas)
 
 PIG-1636: Scalar fail if the scalar variable is generated by limit (daijy)

Modified: 
hadoop/pig/trunk/src/org/apache/pig/newplan/logical/expression/ProjectExpression.java
URL: 
http://svn.apache.org/viewvc/hadoop/pig/trunk/src/org/apache/pig/newplan/logical/expression/ProjectExpression.java?rev=1000667&r1=1000666&r2=1000667&view=diff
==============================================================================
--- 
hadoop/pig/trunk/src/org/apache/pig/newplan/logical/expression/ProjectExpression.java
 (original)
+++ 
hadoop/pig/trunk/src/org/apache/pig/newplan/logical/expression/ProjectExpression.java
 Fri Sep 24 00:04:26 2010
@@ -149,8 +149,11 @@ public class ProjectExpression extends C
                 if (findReferent().getSchema()!=null)
                     fieldSchema = findReferent().getSchema().getField(0);
             }
-            if (fieldSchema!=null)
-                uidOnlyFieldSchema = fieldSchema.mergeUid(uidOnlyFieldSchema);
+            if(fieldSchema == null){
+                fieldSchema = new LogicalSchema.LogicalFieldSchema(null, null, 
DataType.BYTEARRAY);
+            }
+            uidOnlyFieldSchema = fieldSchema.mergeUid(uidOnlyFieldSchema);
+            
         }
         else {
             if (schema == null) {

Modified: hadoop/pig/trunk/test/org/apache/pig/test/TestJoin.java
URL: 
http://svn.apache.org/viewvc/hadoop/pig/trunk/test/org/apache/pig/test/TestJoin.java?rev=1000667&r1=1000666&r2=1000667&view=diff
==============================================================================
--- hadoop/pig/trunk/test/org/apache/pig/test/TestJoin.java (original)
+++ hadoop/pig/trunk/test/org/apache/pig/test/TestJoin.java Fri Sep 24 00:04:26 
2010
@@ -29,6 +29,7 @@ import org.apache.pig.PigException;
 import org.apache.pig.PigServer;
 import org.apache.pig.backend.executionengine.ExecException;
 import org.apache.pig.data.BagFactory;
+import org.apache.pig.data.DataByteArray;
 import org.apache.pig.data.Tuple;
 import org.apache.pig.data.TupleFactory;
 import org.apache.pig.impl.io.FileLocalizer;
@@ -260,6 +261,51 @@ public class TestJoin extends TestCase {
     }
     
     @Test
+    public void testJoinSchema2() throws Exception {
+        // test join where one load does not have schema
+        
+        ExecType execType = ExecType.LOCAL;
+        setUp(execType );
+        String[] input1 = {
+                "1\t2",
+                "2\t3",
+                "3\t4"
+        };
+        String[] input2 = {
+                "1\thello",
+                "4\tbye",
+        };
+        
+        String firstInput = createInputFile(execType, "a.txt", input1);
+        String secondInput = createInputFile(execType, "b.txt", input2);
+        Tuple expectedResultCharArray =
+            
(Tuple)Util.getPigConstant("('1','2','1','hello','1','2','1','hello')");
+        
+        Tuple expectedResult = TupleFactory.getInstance().newTuple();
+        for(Object field : expectedResultCharArray.getAll()){
+            expectedResult.append(new DataByteArray(field.toString()));
+        }
+        
+        // with schema
+        String script = "a = load '"+ firstInput +"' ; " +
+        //re-using alias a for new operator below, doing this intentionally 
+        // because such use case has been seen
+        "a = foreach a generate $0 as i, $1 as j ;" +
+        "b = load '"+ secondInput +"' as (k, l); " +
+        "c = join a by $0, b by $0;" +
+        "d = foreach c generate i,j,k,l,a::i as ai,a::j as aj,b::k as bk,b::l 
as bl;";
+        Util.registerMultiLineQuery(pigServer, script);
+        Iterator<Tuple> it = pigServer.openIterator("d");
+        assertEquals(true, it.hasNext());
+        Tuple res = it.next();
+        assertEquals(expectedResult, res);
+        assertEquals(false, it.hasNext());
+        deleteInputFile(execType, firstInput);
+        deleteInputFile(execType, secondInput);
+        
+    }
+    
+    @Test
     public void testLeftOuterJoin() throws IOException, ParseException {
         for (ExecType execType : execTypes) {
             setUp(execType);


Reply via email to