Author: thejas Date: Fri Sep 24 00:05:15 2010 New Revision: 1000668 URL: http://svn.apache.org/viewvc?rev=1000668&view=rev Log: PIG-1643: join fails for a query with input having 'load using pigstorage without schema' + 'foreach'
Modified: hadoop/pig/branches/branch-0.8/CHANGES.txt hadoop/pig/branches/branch-0.8/src/org/apache/pig/newplan/logical/expression/ProjectExpression.java hadoop/pig/branches/branch-0.8/test/org/apache/pig/test/TestJoin.java Modified: hadoop/pig/branches/branch-0.8/CHANGES.txt URL: http://svn.apache.org/viewvc/hadoop/pig/branches/branch-0.8/CHANGES.txt?rev=1000668&r1=1000667&r2=1000668&view=diff ============================================================================== --- hadoop/pig/branches/branch-0.8/CHANGES.txt (original) +++ hadoop/pig/branches/branch-0.8/CHANGES.txt Fri Sep 24 00:05:15 2010 @@ -198,6 +198,9 @@ PIG-1309: Map-side Cogroup (ashutoshc) BUG FIXES +PIG-1643: join fails for a query with input having 'load using pigstorage +without schema' + 'foreach' (thejas) + PIG-1636: Scalar fail if the scalar variable is generated by limit (daijy) PIG-1605: Adding soft link to plan to solve input file dependency (daijy) Modified: hadoop/pig/branches/branch-0.8/src/org/apache/pig/newplan/logical/expression/ProjectExpression.java URL: http://svn.apache.org/viewvc/hadoop/pig/branches/branch-0.8/src/org/apache/pig/newplan/logical/expression/ProjectExpression.java?rev=1000668&r1=1000667&r2=1000668&view=diff ============================================================================== --- hadoop/pig/branches/branch-0.8/src/org/apache/pig/newplan/logical/expression/ProjectExpression.java (original) +++ hadoop/pig/branches/branch-0.8/src/org/apache/pig/newplan/logical/expression/ProjectExpression.java Fri Sep 24 00:05:15 2010 @@ -149,8 +149,11 @@ public class ProjectExpression extends C if (findReferent().getSchema()!=null) fieldSchema = findReferent().getSchema().getField(0); } - if (fieldSchema!=null) - uidOnlyFieldSchema = fieldSchema.mergeUid(uidOnlyFieldSchema); + if(fieldSchema == null){ + fieldSchema = new LogicalSchema.LogicalFieldSchema(null, null, DataType.BYTEARRAY); + } + uidOnlyFieldSchema = fieldSchema.mergeUid(uidOnlyFieldSchema); + } else { if (schema == null) { Modified: hadoop/pig/branches/branch-0.8/test/org/apache/pig/test/TestJoin.java URL: http://svn.apache.org/viewvc/hadoop/pig/branches/branch-0.8/test/org/apache/pig/test/TestJoin.java?rev=1000668&r1=1000667&r2=1000668&view=diff ============================================================================== --- hadoop/pig/branches/branch-0.8/test/org/apache/pig/test/TestJoin.java (original) +++ hadoop/pig/branches/branch-0.8/test/org/apache/pig/test/TestJoin.java Fri Sep 24 00:05:15 2010 @@ -29,6 +29,7 @@ import org.apache.pig.PigException; import org.apache.pig.PigServer; import org.apache.pig.backend.executionengine.ExecException; import org.apache.pig.data.BagFactory; +import org.apache.pig.data.DataByteArray; import org.apache.pig.data.Tuple; import org.apache.pig.data.TupleFactory; import org.apache.pig.impl.io.FileLocalizer; @@ -260,6 +261,51 @@ public class TestJoin extends TestCase { } @Test + public void testJoinSchema2() throws Exception { + // test join where one load does not have schema + + ExecType execType = ExecType.LOCAL; + setUp(execType ); + String[] input1 = { + "1\t2", + "2\t3", + "3\t4" + }; + String[] input2 = { + "1\thello", + "4\tbye", + }; + + String firstInput = createInputFile(execType, "a.txt", input1); + String secondInput = createInputFile(execType, "b.txt", input2); + Tuple expectedResultCharArray = + (Tuple)Util.getPigConstant("('1','2','1','hello','1','2','1','hello')"); + + Tuple expectedResult = TupleFactory.getInstance().newTuple(); + for(Object field : expectedResultCharArray.getAll()){ + expectedResult.append(new DataByteArray(field.toString())); + } + + // with schema + String script = "a = load '"+ firstInput +"' ; " + + //re-using alias a for new operator below, doing this intentionally + // because such use case has been seen + "a = foreach a generate $0 as i, $1 as j ;" + + "b = load '"+ secondInput +"' as (k, l); " + + "c = join a by $0, b by $0;" + + "d = foreach c generate i,j,k,l,a::i as ai,a::j as aj,b::k as bk,b::l as bl;"; + Util.registerMultiLineQuery(pigServer, script); + Iterator<Tuple> it = pigServer.openIterator("d"); + assertEquals(true, it.hasNext()); + Tuple res = it.next(); + assertEquals(expectedResult, res); + assertEquals(false, it.hasNext()); + deleteInputFile(execType, firstInput); + deleteInputFile(execType, secondInput); + + } + + @Test public void testLeftOuterJoin() throws IOException, ParseException { for (ExecType execType : execTypes) { setUp(execType);