Author: daijy
Date: Fri Oct  2 17:09:48 2009
New Revision: 821101

URL: http://svn.apache.org/viewvc?rev=821101&view=rev
Log:
PIG-592: schema inferred incorrectly

Modified:
    hadoop/pig/trunk/CHANGES.txt
    hadoop/pig/trunk/src/org/apache/pig/impl/logicalLayer/LOJoin.java
    hadoop/pig/trunk/test/org/apache/pig/test/TestFRJoin.java
    hadoop/pig/trunk/test/org/apache/pig/test/TestJoin.java
    hadoop/pig/trunk/test/org/apache/pig/test/TestMergeJoin.java
    hadoop/pig/trunk/test/org/apache/pig/test/TestTypeCheckingValidator.java

Modified: hadoop/pig/trunk/CHANGES.txt
URL: 
http://svn.apache.org/viewvc/hadoop/pig/trunk/CHANGES.txt?rev=821101&r1=821100&r2=821101&view=diff
==============================================================================
--- hadoop/pig/trunk/CHANGES.txt (original)
+++ hadoop/pig/trunk/CHANGES.txt Fri Oct  2 17:09:48 2009
@@ -32,6 +32,8 @@
 
 PIG-948: [Usability] Relating pig script with MR jobs (ashutoshc via daijy)
 
+PIG-960: Using Hadoop's optimized LineRecordReader for reading Tuples in 
PigStorage ( ankit.modi via daijy)
+
 OPTIMIZATIONS
 
 BUG FIXES
@@ -46,6 +48,8 @@
 PIG-951:  Set parallelism explicitly to 1 for indexing job in merge join
           (ashutoc via gates).
 
+PIG-592: schema inferred incorrectly (daijy)
+
 Release 0.5.0 - Unreleased
 
 INCOMPATIBLE CHANGES
@@ -69,8 +73,6 @@
 
 IMPROVEMENTS
 
-PIG-960: Using Hadoop's optimized LineRecordReader for reading Tuples in 
PigStorage ( ankit.modi via daijy)
-
 PIG-938: documentation changes for Pig 0.4.0 release (chandec via olgan)
 
 PIG-578: join ... outer, ... outer semantics are a no-ops, should produce

Modified: hadoop/pig/trunk/src/org/apache/pig/impl/logicalLayer/LOJoin.java
URL: 
http://svn.apache.org/viewvc/hadoop/pig/trunk/src/org/apache/pig/impl/logicalLayer/LOJoin.java?rev=821101&r1=821100&r2=821101&view=diff
==============================================================================
--- hadoop/pig/trunk/src/org/apache/pig/impl/logicalLayer/LOJoin.java (original)
+++ hadoop/pig/trunk/src/org/apache/pig/impl/logicalLayer/LOJoin.java Fri Oct  
2 17:09:48 2009
@@ -134,6 +134,7 @@
         if(!mIsSchemaComputed){
             List<Schema.FieldSchema> fss = new ArrayList<Schema.FieldSchema>();
             int i=-1;
+            boolean seeUnknown = false;
             for (LogicalOperator op : inputs) {
                 try {
                     Schema cSchema = op.getSchema();
@@ -159,9 +160,7 @@
                             fss.add(newFS);
                         }
                     } else {
-                        FieldSchema newFS = new FieldSchema(null, 
DataType.BYTEARRAY);
-                        newFS.setParent(null, op);
-                        fss.add(newFS);
+                        seeUnknown = true;
                     }
                 } catch (FrontendException ioe) {
                     mIsSchemaComputed = false;
@@ -170,19 +169,22 @@
                 }
             }
             mIsSchemaComputed = true;
-            mSchema = new Schema(fss);
-            for (Entry<String, Integer> ent : nonDuplicates.entrySet()) {
-                int ind = ent.getValue();
-                if(ind==-1) continue;
-                FieldSchema prevSch = fss.get(ind);
-                // this is a non duplicate and hence can be referred to
-                // with just the field schema alias or outeralias::field 
schema alias
-                // In mSchema we have outeralias::fieldschemaalias. To allow
-                // using just the field schema alias, add it to mSchemas
-                // as an alias for this field.
-                mSchema.addAlias(ent.getKey(), prevSch);
+            mSchema = null;
+            if (!seeUnknown)
+            {
+                mSchema = new Schema(fss);
+                for (Entry<String, Integer> ent : nonDuplicates.entrySet()) {
+                    int ind = ent.getValue();
+                    if(ind==-1) continue;
+                    FieldSchema prevSch = fss.get(ind);
+                    // this is a non duplicate and hence can be referred to
+                    // with just the field schema alias or outeralias::field 
schema alias
+                    // In mSchema we have outeralias::fieldschemaalias. To 
allow
+                    // using just the field schema alias, add it to mSchemas
+                    // as an alias for this field.
+                    mSchema.addAlias(ent.getKey(), prevSch);
+                }
             }
-            
         }
         return mSchema;
     }

Modified: hadoop/pig/trunk/test/org/apache/pig/test/TestFRJoin.java
URL: 
http://svn.apache.org/viewvc/hadoop/pig/trunk/test/org/apache/pig/test/TestFRJoin.java?rev=821101&r1=821100&r2=821101&view=diff
==============================================================================
--- hadoop/pig/trunk/test/org/apache/pig/test/TestFRJoin.java (original)
+++ hadoop/pig/trunk/test/org/apache/pig/test/TestFRJoin.java Fri Oct  2 
17:09:48 2009
@@ -378,7 +378,7 @@
         frjSch = pigServer.dumpSchema("C");
         pigServer.registerQuery("C = join A by $0, B by $0;");
         shjSch = pigServer.dumpSchema("C");
-        Assert.assertEquals(true, shjSch.equals(frjSch));
+        Assert.assertTrue(shjSch == null);
     }
     
     @Test
@@ -404,7 +404,7 @@
         frjSch = pigServer.dumpSchema("D");
         pigServer.registerQuery("D = join A by $0, B by $0, C by $0;");
         shjSch = pigServer.dumpSchema("D");
-        Assert.assertEquals(true, shjSch.equals(frjSch));
+        Assert.assertTrue(shjSch == null);
     }
     
     @Test
@@ -428,6 +428,6 @@
         frjSch = pigServer.dumpSchema("C");
         pigServer.registerQuery("C = join A by ($0,$1), B by ($0,$1);");
         shjSch = pigServer.dumpSchema("C");
-        Assert.assertEquals(true, shjSch.equals(frjSch));
+        Assert.assertTrue(shjSch == null);
     }
 }

Modified: hadoop/pig/trunk/test/org/apache/pig/test/TestJoin.java
URL: 
http://svn.apache.org/viewvc/hadoop/pig/trunk/test/org/apache/pig/test/TestJoin.java?rev=821101&r1=821100&r2=821101&view=diff
==============================================================================
--- hadoop/pig/trunk/test/org/apache/pig/test/TestJoin.java (original)
+++ hadoop/pig/trunk/test/org/apache/pig/test/TestJoin.java Fri Oct  2 17:09:48 
2009
@@ -33,6 +33,7 @@
 import org.apache.pig.data.TupleFactory;
 import org.apache.pig.impl.io.FileLocalizer;
 import org.apache.pig.impl.logicalLayer.parser.ParseException;
+import org.apache.pig.impl.logicalLayer.schema.Schema;
 import org.apache.pig.impl.util.LogUtils;
 import org.apache.pig.test.utils.Identity;
 import org.apache.pig.test.utils.LogicalPlanTester;
@@ -93,6 +94,21 @@
             new File(fileName).delete();
         }
     }
+
+    
+    @Test
+    public void testJoinUnkownSchema() throws Exception {
+        // If any of the input schema is unknown, the resulting schema should 
be unknown as well
+        for (ExecType execType : execTypes) {
+            setUp(execType);
+            String script = "a = load 'a.txt';" +
+                    "b = load 'b.txt'; " +
+                    "c = join a by $0, b by $0;";
+            Util.registerMultiLineQuery(pigServer, script);
+            Schema schema = pigServer.dumpSchema("c");
+            assertTrue(schema == null);
+        }
+    }
     
     @Test
     public void testDefaultJoin() throws IOException, ParseException {

Modified: hadoop/pig/trunk/test/org/apache/pig/test/TestMergeJoin.java
URL: 
http://svn.apache.org/viewvc/hadoop/pig/trunk/test/org/apache/pig/test/TestMergeJoin.java?rev=821101&r1=821100&r2=821101&view=diff
==============================================================================
--- hadoop/pig/trunk/test/org/apache/pig/test/TestMergeJoin.java (original)
+++ hadoop/pig/trunk/test/org/apache/pig/test/TestMergeJoin.java Fri Oct  2 
17:09:48 2009
@@ -476,6 +476,6 @@
         mjSch = pigServer.dumpSchema("C");
         pigServer.registerQuery("C = join A by ($0,$1), B by ($0,$1);");
         shjSch = pigServer.dumpSchema("C");
-        Assert.assertEquals(true, shjSch.equals(mjSch));
+        Assert.assertTrue(shjSch == null);
     }
 }

Modified: 
hadoop/pig/trunk/test/org/apache/pig/test/TestTypeCheckingValidator.java
URL: 
http://svn.apache.org/viewvc/hadoop/pig/trunk/test/org/apache/pig/test/TestTypeCheckingValidator.java?rev=821101&r1=821100&r2=821101&view=diff
==============================================================================
--- hadoop/pig/trunk/test/org/apache/pig/test/TestTypeCheckingValidator.java 
(original)
+++ hadoop/pig/trunk/test/org/apache/pig/test/TestTypeCheckingValidator.java 
Fri Oct  2 17:09:48 2009
@@ -4970,19 +4970,7 @@
         printTypeGraph(plan) ;
         planTester.printPlan(plan, 
TypeCheckingTestUtil.getCurrentMethodName());
 
-        if (collector.hasError()) {
-            throw new AssertionError("Expect no error") ;
-        }
-
-        LOForEach foreach = (LOForEach)plan.getLeaves().get(0);
-        LogicalPlan foreachPlan = foreach.getForEachPlans().get(0);
-
-        LogicalOperator exOp = foreachPlan.getRoots().get(0);
-
-        if(! (exOp instanceof LOProject)) exOp = foreachPlan.getRoots().get(1);
-
-        LOCast cast = (LOCast)foreachPlan.getSuccessors(exOp).get(0);
-        
assertTrue(cast.getLoadFuncSpec().getClassName().startsWith("PigStorage"));
+        assertTrue(collector.hasError());
     }
 
     @Test
@@ -5043,19 +5031,7 @@
         printTypeGraph(plan) ;
         planTester.printPlan(plan, 
TypeCheckingTestUtil.getCurrentMethodName());
 
-        if (collector.hasError()) {
-            throw new AssertionError("Expect no error") ;
-        }
-
-        LOForEach foreach = (LOForEach)plan.getLeaves().get(0);
-        LogicalPlan foreachPlan = foreach.getForEachPlans().get(0);
-
-        LogicalOperator exOp = foreachPlan.getRoots().get(0);
-
-        if(! (exOp instanceof LOProject)) exOp = foreachPlan.getRoots().get(1);
-
-        LOCast cast = (LOCast)foreachPlan.getSuccessors(exOp).get(0);
-        
assertTrue(cast.getLoadFuncSpec().getClassName().startsWith("PigStorage"));
+        assertTrue(collector.hasError());
     }
 
     @Test


Reply via email to