Author: pradeepkth
Date: Mon Sep 14 19:36:54 2009
New Revision: 814800
URL: http://svn.apache.org/viewvc?rev=814800&view=rev
Log:
PIG-957: Tutorial is broken with 0.4 branch and trunk (pradeepkth)
Modified:
hadoop/pig/trunk/CHANGES.txt
hadoop/pig/trunk/src/org/apache/pig/impl/logicalLayer/LOJoin.java
hadoop/pig/trunk/test/org/apache/pig/test/TestJoin.java
hadoop/pig/trunk/test/org/apache/pig/test/TestPigServer.java
Modified: hadoop/pig/trunk/CHANGES.txt
URL:
http://svn.apache.org/viewvc/hadoop/pig/trunk/CHANGES.txt?rev=814800&r1=814799&r2=814800&view=diff
==============================================================================
--- hadoop/pig/trunk/CHANGES.txt (original)
+++ hadoop/pig/trunk/CHANGES.txt Mon Sep 14 19:36:54 2009
@@ -83,6 +83,8 @@
BUG FIXES
+ PIG-957: Tutorial is broken with 0.4 branch and trunk (pradeepkth)
+
PIG-955: Skewed join produces invalid results (yinghe via olgan)
PIG-954: Skewed join fails when pig.skewedjoin.reduce.memusage is not
Modified: hadoop/pig/trunk/src/org/apache/pig/impl/logicalLayer/LOJoin.java
URL:
http://svn.apache.org/viewvc/hadoop/pig/trunk/src/org/apache/pig/impl/logicalLayer/LOJoin.java?rev=814800&r1=814799&r2=814800&view=diff
==============================================================================
--- hadoop/pig/trunk/src/org/apache/pig/impl/logicalLayer/LOJoin.java (original)
+++ hadoop/pig/trunk/src/org/apache/pig/impl/logicalLayer/LOJoin.java Mon Sep
14 19:36:54 2009
@@ -170,13 +170,19 @@
}
}
mIsSchemaComputed = true;
+ mSchema = new Schema(fss);
for (Entry<String, Integer> ent : nonDuplicates.entrySet()) {
int ind = ent.getValue();
if(ind==-1) continue;
FieldSchema prevSch = fss.get(ind);
- prevSch.alias = ent.getKey();
+ // this is a non duplicate and hence can be referred to
+ // with just the field schema alias or outeralias::field
schema alias
+ // In mSchema we have outeralias::fieldschemaalias. To allow
+ // using just the field schema alias, add it to mSchemas
+ // as an alias for this field.
+ mSchema.addAlias(ent.getKey(), prevSch);
}
- mSchema = new Schema(fss);
+
}
return mSchema;
}
Modified: hadoop/pig/trunk/test/org/apache/pig/test/TestJoin.java
URL:
http://svn.apache.org/viewvc/hadoop/pig/trunk/test/org/apache/pig/test/TestJoin.java?rev=814800&r1=814799&r2=814800&view=diff
==============================================================================
--- hadoop/pig/trunk/test/org/apache/pig/test/TestJoin.java (original)
+++ hadoop/pig/trunk/test/org/apache/pig/test/TestJoin.java Mon Sep 14 19:36:54
2009
@@ -99,6 +99,65 @@
Util.deleteFile(cluster, "b.txt");
}
+
+ @Test
+ public void testJoinSchema() throws Exception {
+ String[] input1 = {
+ "1\t2",
+ "2\t3",
+ "3\t4"
+ };
+ String[] input2 = {
+ "1\thello",
+ "4\tbye",
+ };
+
+ Util.createInputFile(cluster, "a.txt", input1);
+ Util.createInputFile(cluster, "b.txt", input2);
+ Tuple expectedResult =
(Tuple)Util.getPigConstant("(1,2,1,'hello',1,2,1,'hello')");
+
+ // with schema
+ String script = "a = load 'a.txt' as (i:int, j:int); " +
+ "b = load 'b.txt' as (k:int, l:chararray); " +
+ "c = join a by $0, b by $0;" +
+ "d = foreach c generate i,j,k,l,a::i,a::j,b::k,b::l;";
+ Util.registerMultiLineQuery(pigServer, script);
+ Iterator<Tuple> it = pigServer.openIterator("d");
+ assertEquals(true, it.hasNext());
+ assertEquals(expectedResult, it.next());
+ assertEquals(false, it.hasNext());
+
+ // schema with duplicates
+ script = "a = load 'a.txt' as (i:int, j:int); " +
+ "b = load 'b.txt' as (i:int, l:chararray); " +
+ "c = join a by $0, b by $0;" +
+ "d = foreach c generate i,j,l,a::i,a::j,b::i,b::l;";
+ boolean exceptionThrown = false;
+ try{
+ Util.registerMultiLineQuery(pigServer, script);
+ }catch (Exception e) {
+ PigException pe = LogUtils.getPigException(e);
+ assertEquals(1025, pe.getErrorCode());
+ exceptionThrown = true;
+ }
+ assertEquals(true, exceptionThrown);
+
+ // schema with duplicates with resolution
+ script = "a = load 'a.txt' as (i:int, j:int); " +
+ "b = load 'b.txt' as (i:int, l:chararray); " +
+ "c = join a by $0, b by $0;" +
+ "d = foreach c generate a::i,j,b::i,l,a::i,a::j,b::i,b::l;";
+ Util.registerMultiLineQuery(pigServer, script);
+ it = pigServer.openIterator("d");
+ assertEquals(true, it.hasNext());
+ assertEquals(expectedResult, it.next());
+ assertEquals(false, it.hasNext());
+ Util.deleteFile(cluster, "a.txt");
+ Util.deleteFile(cluster, "b.txt");
+
+
+ }
+
@Test
public void testLeftOuterJoin() throws IOException, ParseException {
String[] input1 = {
Modified: hadoop/pig/trunk/test/org/apache/pig/test/TestPigServer.java
URL:
http://svn.apache.org/viewvc/hadoop/pig/trunk/test/org/apache/pig/test/TestPigServer.java?rev=814800&r1=814799&r2=814800&view=diff
==============================================================================
--- hadoop/pig/trunk/test/org/apache/pig/test/TestPigServer.java (original)
+++ hadoop/pig/trunk/test/org/apache/pig/test/TestPigServer.java Mon Sep 14
19:36:54 2009
@@ -504,7 +504,7 @@
InputStream fileWithStdOutContents = new DataInputStream( new
BufferedInputStream( new FileInputStream(stdOutRedirectedFile)));
BufferedReader reader = new BufferedReader(new
InputStreamReader(fileWithStdOutContents));
while ((s = reader.readLine()) != null) {
- assertTrue(s.equals("c: {field1: int,field2: float,field3:
chararray,field4: bytearray,field5: double,field6: chararray}") == true);
+ assertEquals("c: {a::field1: int,a::field2: float,a::field3:
chararray,b::field4: bytearray,b::field5: double,b::field6: chararray}", s );
}
fileWithStdOutContents.close();
}