Author: daijy
Date: Fri Oct 2 17:09:48 2009
New Revision: 821101
URL: http://svn.apache.org/viewvc?rev=821101&view=rev
Log:
PIG-592: schema inferred incorrectly
Modified:
hadoop/pig/trunk/CHANGES.txt
hadoop/pig/trunk/src/org/apache/pig/impl/logicalLayer/LOJoin.java
hadoop/pig/trunk/test/org/apache/pig/test/TestFRJoin.java
hadoop/pig/trunk/test/org/apache/pig/test/TestJoin.java
hadoop/pig/trunk/test/org/apache/pig/test/TestMergeJoin.java
hadoop/pig/trunk/test/org/apache/pig/test/TestTypeCheckingValidator.java
Modified: hadoop/pig/trunk/CHANGES.txt
URL:
http://svn.apache.org/viewvc/hadoop/pig/trunk/CHANGES.txt?rev=821101&r1=821100&r2=821101&view=diff
==============================================================================
--- hadoop/pig/trunk/CHANGES.txt (original)
+++ hadoop/pig/trunk/CHANGES.txt Fri Oct 2 17:09:48 2009
@@ -32,6 +32,8 @@
PIG-948: [Usability] Relating pig script with MR jobs (ashutoshc via daijy)
+PIG-960: Using Hadoop's optimized LineRecordReader for reading Tuples in
PigStorage ( ankit.modi via daijy)
+
OPTIMIZATIONS
BUG FIXES
@@ -46,6 +48,8 @@
PIG-951: Set parallelism explicitly to 1 for indexing job in merge join
(ashutoc via gates).
+PIG-592: schema inferred incorrectly (daijy)
+
Release 0.5.0 - Unreleased
INCOMPATIBLE CHANGES
@@ -69,8 +73,6 @@
IMPROVEMENTS
-PIG-960: Using Hadoop's optimized LineRecordReader for reading Tuples in
PigStorage ( ankit.modi via daijy)
-
PIG-938: documentation changes for Pig 0.4.0 release (chandec via olgan)
PIG-578: join ... outer, ... outer semantics are a no-ops, should produce
Modified: hadoop/pig/trunk/src/org/apache/pig/impl/logicalLayer/LOJoin.java
URL:
http://svn.apache.org/viewvc/hadoop/pig/trunk/src/org/apache/pig/impl/logicalLayer/LOJoin.java?rev=821101&r1=821100&r2=821101&view=diff
==============================================================================
--- hadoop/pig/trunk/src/org/apache/pig/impl/logicalLayer/LOJoin.java (original)
+++ hadoop/pig/trunk/src/org/apache/pig/impl/logicalLayer/LOJoin.java Fri Oct
2 17:09:48 2009
@@ -134,6 +134,7 @@
if(!mIsSchemaComputed){
List<Schema.FieldSchema> fss = new ArrayList<Schema.FieldSchema>();
int i=-1;
+ boolean seeUnknown = false;
for (LogicalOperator op : inputs) {
try {
Schema cSchema = op.getSchema();
@@ -159,9 +160,7 @@
fss.add(newFS);
}
} else {
- FieldSchema newFS = new FieldSchema(null,
DataType.BYTEARRAY);
- newFS.setParent(null, op);
- fss.add(newFS);
+ seeUnknown = true;
}
} catch (FrontendException ioe) {
mIsSchemaComputed = false;
@@ -170,19 +169,22 @@
}
}
mIsSchemaComputed = true;
- mSchema = new Schema(fss);
- for (Entry<String, Integer> ent : nonDuplicates.entrySet()) {
- int ind = ent.getValue();
- if(ind==-1) continue;
- FieldSchema prevSch = fss.get(ind);
- // this is a non duplicate and hence can be referred to
- // with just the field schema alias or outeralias::field
schema alias
- // In mSchema we have outeralias::fieldschemaalias. To allow
- // using just the field schema alias, add it to mSchemas
- // as an alias for this field.
- mSchema.addAlias(ent.getKey(), prevSch);
+ mSchema = null;
+ if (!seeUnknown)
+ {
+ mSchema = new Schema(fss);
+ for (Entry<String, Integer> ent : nonDuplicates.entrySet()) {
+ int ind = ent.getValue();
+ if(ind==-1) continue;
+ FieldSchema prevSch = fss.get(ind);
+ // this is a non duplicate and hence can be referred to
+ // with just the field schema alias or outeralias::field
schema alias
+ // In mSchema we have outeralias::fieldschemaalias. To
allow
+ // using just the field schema alias, add it to mSchemas
+ // as an alias for this field.
+ mSchema.addAlias(ent.getKey(), prevSch);
+ }
}
-
}
return mSchema;
}
Modified: hadoop/pig/trunk/test/org/apache/pig/test/TestFRJoin.java
URL:
http://svn.apache.org/viewvc/hadoop/pig/trunk/test/org/apache/pig/test/TestFRJoin.java?rev=821101&r1=821100&r2=821101&view=diff
==============================================================================
--- hadoop/pig/trunk/test/org/apache/pig/test/TestFRJoin.java (original)
+++ hadoop/pig/trunk/test/org/apache/pig/test/TestFRJoin.java Fri Oct 2
17:09:48 2009
@@ -378,7 +378,7 @@
frjSch = pigServer.dumpSchema("C");
pigServer.registerQuery("C = join A by $0, B by $0;");
shjSch = pigServer.dumpSchema("C");
- Assert.assertEquals(true, shjSch.equals(frjSch));
+ Assert.assertTrue(shjSch == null);
}
@Test
@@ -404,7 +404,7 @@
frjSch = pigServer.dumpSchema("D");
pigServer.registerQuery("D = join A by $0, B by $0, C by $0;");
shjSch = pigServer.dumpSchema("D");
- Assert.assertEquals(true, shjSch.equals(frjSch));
+ Assert.assertTrue(shjSch == null);
}
@Test
@@ -428,6 +428,6 @@
frjSch = pigServer.dumpSchema("C");
pigServer.registerQuery("C = join A by ($0,$1), B by ($0,$1);");
shjSch = pigServer.dumpSchema("C");
- Assert.assertEquals(true, shjSch.equals(frjSch));
+ Assert.assertTrue(shjSch == null);
}
}
Modified: hadoop/pig/trunk/test/org/apache/pig/test/TestJoin.java
URL:
http://svn.apache.org/viewvc/hadoop/pig/trunk/test/org/apache/pig/test/TestJoin.java?rev=821101&r1=821100&r2=821101&view=diff
==============================================================================
--- hadoop/pig/trunk/test/org/apache/pig/test/TestJoin.java (original)
+++ hadoop/pig/trunk/test/org/apache/pig/test/TestJoin.java Fri Oct 2 17:09:48
2009
@@ -33,6 +33,7 @@
import org.apache.pig.data.TupleFactory;
import org.apache.pig.impl.io.FileLocalizer;
import org.apache.pig.impl.logicalLayer.parser.ParseException;
+import org.apache.pig.impl.logicalLayer.schema.Schema;
import org.apache.pig.impl.util.LogUtils;
import org.apache.pig.test.utils.Identity;
import org.apache.pig.test.utils.LogicalPlanTester;
@@ -93,6 +94,21 @@
new File(fileName).delete();
}
}
+
+
+ @Test
+ public void testJoinUnkownSchema() throws Exception {
+ // If any of the input schema is unknown, the resulting schema should
be unknown as well
+ for (ExecType execType : execTypes) {
+ setUp(execType);
+ String script = "a = load 'a.txt';" +
+ "b = load 'b.txt'; " +
+ "c = join a by $0, b by $0;";
+ Util.registerMultiLineQuery(pigServer, script);
+ Schema schema = pigServer.dumpSchema("c");
+ assertTrue(schema == null);
+ }
+ }
@Test
public void testDefaultJoin() throws IOException, ParseException {
Modified: hadoop/pig/trunk/test/org/apache/pig/test/TestMergeJoin.java
URL:
http://svn.apache.org/viewvc/hadoop/pig/trunk/test/org/apache/pig/test/TestMergeJoin.java?rev=821101&r1=821100&r2=821101&view=diff
==============================================================================
--- hadoop/pig/trunk/test/org/apache/pig/test/TestMergeJoin.java (original)
+++ hadoop/pig/trunk/test/org/apache/pig/test/TestMergeJoin.java Fri Oct 2
17:09:48 2009
@@ -476,6 +476,6 @@
mjSch = pigServer.dumpSchema("C");
pigServer.registerQuery("C = join A by ($0,$1), B by ($0,$1);");
shjSch = pigServer.dumpSchema("C");
- Assert.assertEquals(true, shjSch.equals(mjSch));
+ Assert.assertTrue(shjSch == null);
}
}
Modified:
hadoop/pig/trunk/test/org/apache/pig/test/TestTypeCheckingValidator.java
URL:
http://svn.apache.org/viewvc/hadoop/pig/trunk/test/org/apache/pig/test/TestTypeCheckingValidator.java?rev=821101&r1=821100&r2=821101&view=diff
==============================================================================
--- hadoop/pig/trunk/test/org/apache/pig/test/TestTypeCheckingValidator.java
(original)
+++ hadoop/pig/trunk/test/org/apache/pig/test/TestTypeCheckingValidator.java
Fri Oct 2 17:09:48 2009
@@ -4970,19 +4970,7 @@
printTypeGraph(plan) ;
planTester.printPlan(plan,
TypeCheckingTestUtil.getCurrentMethodName());
- if (collector.hasError()) {
- throw new AssertionError("Expect no error") ;
- }
-
- LOForEach foreach = (LOForEach)plan.getLeaves().get(0);
- LogicalPlan foreachPlan = foreach.getForEachPlans().get(0);
-
- LogicalOperator exOp = foreachPlan.getRoots().get(0);
-
- if(! (exOp instanceof LOProject)) exOp = foreachPlan.getRoots().get(1);
-
- LOCast cast = (LOCast)foreachPlan.getSuccessors(exOp).get(0);
-
assertTrue(cast.getLoadFuncSpec().getClassName().startsWith("PigStorage"));
+ assertTrue(collector.hasError());
}
@Test
@@ -5043,19 +5031,7 @@
printTypeGraph(plan) ;
planTester.printPlan(plan,
TypeCheckingTestUtil.getCurrentMethodName());
- if (collector.hasError()) {
- throw new AssertionError("Expect no error") ;
- }
-
- LOForEach foreach = (LOForEach)plan.getLeaves().get(0);
- LogicalPlan foreachPlan = foreach.getForEachPlans().get(0);
-
- LogicalOperator exOp = foreachPlan.getRoots().get(0);
-
- if(! (exOp instanceof LOProject)) exOp = foreachPlan.getRoots().get(1);
-
- LOCast cast = (LOCast)foreachPlan.getSuccessors(exOp).get(0);
-
assertTrue(cast.getLoadFuncSpec().getClassName().startsWith("PigStorage"));
+ assertTrue(collector.hasError());
}
@Test