Author: pradeepkth Date: Mon Sep 14 19:36:54 2009 New Revision: 814800 URL: http://svn.apache.org/viewvc?rev=814800&view=rev Log: PIG-957: Tutorial is broken with 0.4 branch and trunk (pradeepkth)
Modified: hadoop/pig/trunk/CHANGES.txt hadoop/pig/trunk/src/org/apache/pig/impl/logicalLayer/LOJoin.java hadoop/pig/trunk/test/org/apache/pig/test/TestJoin.java hadoop/pig/trunk/test/org/apache/pig/test/TestPigServer.java Modified: hadoop/pig/trunk/CHANGES.txt URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/CHANGES.txt?rev=814800&r1=814799&r2=814800&view=diff ============================================================================== --- hadoop/pig/trunk/CHANGES.txt (original) +++ hadoop/pig/trunk/CHANGES.txt Mon Sep 14 19:36:54 2009 @@ -83,6 +83,8 @@ BUG FIXES + PIG-957: Tutorial is broken with 0.4 branch and trunk (pradeepkth) + PIG-955: Skewed join produces invalid results (yinghe via olgan) PIG-954: Skewed join fails when pig.skewedjoin.reduce.memusage is not Modified: hadoop/pig/trunk/src/org/apache/pig/impl/logicalLayer/LOJoin.java URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/src/org/apache/pig/impl/logicalLayer/LOJoin.java?rev=814800&r1=814799&r2=814800&view=diff ============================================================================== --- hadoop/pig/trunk/src/org/apache/pig/impl/logicalLayer/LOJoin.java (original) +++ hadoop/pig/trunk/src/org/apache/pig/impl/logicalLayer/LOJoin.java Mon Sep 14 19:36:54 2009 @@ -170,13 +170,19 @@ } } mIsSchemaComputed = true; + mSchema = new Schema(fss); for (Entry<String, Integer> ent : nonDuplicates.entrySet()) { int ind = ent.getValue(); if(ind==-1) continue; FieldSchema prevSch = fss.get(ind); - prevSch.alias = ent.getKey(); + // this is a non duplicate and hence can be referred to + // with just the field schema alias or outeralias::field schema alias + // In mSchema we have outeralias::fieldschemaalias. To allow + // using just the field schema alias, add it to mSchemas + // as an alias for this field. + mSchema.addAlias(ent.getKey(), prevSch); } - mSchema = new Schema(fss); + } return mSchema; } Modified: hadoop/pig/trunk/test/org/apache/pig/test/TestJoin.java URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/test/org/apache/pig/test/TestJoin.java?rev=814800&r1=814799&r2=814800&view=diff ============================================================================== --- hadoop/pig/trunk/test/org/apache/pig/test/TestJoin.java (original) +++ hadoop/pig/trunk/test/org/apache/pig/test/TestJoin.java Mon Sep 14 19:36:54 2009 @@ -99,6 +99,65 @@ Util.deleteFile(cluster, "b.txt"); } + + @Test + public void testJoinSchema() throws Exception { + String[] input1 = { + "1\t2", + "2\t3", + "3\t4" + }; + String[] input2 = { + "1\thello", + "4\tbye", + }; + + Util.createInputFile(cluster, "a.txt", input1); + Util.createInputFile(cluster, "b.txt", input2); + Tuple expectedResult = (Tuple)Util.getPigConstant("(1,2,1,'hello',1,2,1,'hello')"); + + // with schema + String script = "a = load 'a.txt' as (i:int, j:int); " + + "b = load 'b.txt' as (k:int, l:chararray); " + + "c = join a by $0, b by $0;" + + "d = foreach c generate i,j,k,l,a::i,a::j,b::k,b::l;"; + Util.registerMultiLineQuery(pigServer, script); + Iterator<Tuple> it = pigServer.openIterator("d"); + assertEquals(true, it.hasNext()); + assertEquals(expectedResult, it.next()); + assertEquals(false, it.hasNext()); + + // schema with duplicates + script = "a = load 'a.txt' as (i:int, j:int); " + + "b = load 'b.txt' as (i:int, l:chararray); " + + "c = join a by $0, b by $0;" + + "d = foreach c generate i,j,l,a::i,a::j,b::i,b::l;"; + boolean exceptionThrown = false; + try{ + Util.registerMultiLineQuery(pigServer, script); + }catch (Exception e) { + PigException pe = LogUtils.getPigException(e); + assertEquals(1025, pe.getErrorCode()); + exceptionThrown = true; + } + assertEquals(true, exceptionThrown); + + // schema with duplicates with resolution + script = "a = load 'a.txt' as (i:int, j:int); " + + "b = load 'b.txt' as (i:int, l:chararray); " + + "c = join a by $0, b by $0;" + + "d = foreach c generate a::i,j,b::i,l,a::i,a::j,b::i,b::l;"; + Util.registerMultiLineQuery(pigServer, script); + it = pigServer.openIterator("d"); + assertEquals(true, it.hasNext()); + assertEquals(expectedResult, it.next()); + assertEquals(false, it.hasNext()); + Util.deleteFile(cluster, "a.txt"); + Util.deleteFile(cluster, "b.txt"); + + + } + @Test public void testLeftOuterJoin() throws IOException, ParseException { String[] input1 = { Modified: hadoop/pig/trunk/test/org/apache/pig/test/TestPigServer.java URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/test/org/apache/pig/test/TestPigServer.java?rev=814800&r1=814799&r2=814800&view=diff ============================================================================== --- hadoop/pig/trunk/test/org/apache/pig/test/TestPigServer.java (original) +++ hadoop/pig/trunk/test/org/apache/pig/test/TestPigServer.java Mon Sep 14 19:36:54 2009 @@ -504,7 +504,7 @@ InputStream fileWithStdOutContents = new DataInputStream( new BufferedInputStream( new FileInputStream(stdOutRedirectedFile))); BufferedReader reader = new BufferedReader(new InputStreamReader(fileWithStdOutContents)); while ((s = reader.readLine()) != null) { - assertTrue(s.equals("c: {field1: int,field2: float,field3: chararray,field4: bytearray,field5: double,field6: chararray}") == true); + assertEquals("c: {a::field1: int,a::field2: float,a::field3: chararray,b::field4: bytearray,b::field5: double,b::field6: chararray}", s ); } fileWithStdOutContents.close(); }