Author: olga Date: Tue Dec 15 01:26:29 2009 New Revision: 890596 URL: http://svn.apache.org/viewvc?rev=890596&view=rev Log: PIG-973: type resolution inconsistency (rding via olgan)
Modified: hadoop/pig/trunk/CHANGES.txt hadoop/pig/trunk/src/org/apache/pig/impl/logicalLayer/validators/TypeCheckingVisitor.java hadoop/pig/trunk/test/org/apache/pig/test/TestForEachNestedPlan.java hadoop/pig/trunk/test/org/apache/pig/test/TestTypeChecking.java Modified: hadoop/pig/trunk/CHANGES.txt URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/CHANGES.txt?rev=890596&r1=890595&r2=890596&view=diff ============================================================================== --- hadoop/pig/trunk/CHANGES.txt (original) +++ hadoop/pig/trunk/CHANGES.txt Tue Dec 15 01:26:29 2009 @@ -51,6 +51,8 @@ BUG FIXES +PIG-973: type resolution inconsistency (rding via olgan) + PIG-1135: skewed join partitioner returns negative partition index (yinghe via olgan) Modified: hadoop/pig/trunk/src/org/apache/pig/impl/logicalLayer/validators/TypeCheckingVisitor.java URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/src/org/apache/pig/impl/logicalLayer/validators/TypeCheckingVisitor.java?rev=890596&r1=890595&r2=890596&view=diff ============================================================================== --- hadoop/pig/trunk/src/org/apache/pig/impl/logicalLayer/validators/TypeCheckingVisitor.java (original) +++ hadoop/pig/trunk/src/org/apache/pig/impl/logicalLayer/validators/TypeCheckingVisitor.java Tue Dec 15 01:26:29 2009 @@ -1084,34 +1084,6 @@ } EvalFunc<?> ef = (EvalFunc<?>) PigContext.instantiateFuncFromSpec(func.getFuncSpec()); - - // If the function is algebraic and the project is just sentinel - // (special case when we apply aggregate on flattened members) - // then it will never match algebraic functions' schemas - // without this - - // Assuming all aggregates has only one argument at this stage - if(func.getArguments()!=null && func.getArguments().size()>0){ - ExpressionOperator tmpExp = func.getArguments().get(0) ; - if ( (ef instanceof Algebraic) - && (tmpExp instanceof LOProject) - && (((LOProject)tmpExp).getSentinel())) { - - FieldSchema tmpField ; - - try { - // embed the schema above inside a bag - tmpField = new FieldSchema(null, s, DataType.BAG) ; - } - catch (FrontendException e) { - int errCode = 1023; - String msg = "Unable to create new field schema."; - throw new TypeCheckerException(msg, errCode, PigException.INPUT, e) ; - } - - s = new Schema(tmpField) ; - } - } // ask the EvalFunc what types of inputs it can handle List<FuncSpec> funcSpecs = null; Modified: hadoop/pig/trunk/test/org/apache/pig/test/TestForEachNestedPlan.java URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/test/org/apache/pig/test/TestForEachNestedPlan.java?rev=890596&r1=890595&r2=890596&view=diff ============================================================================== --- hadoop/pig/trunk/test/org/apache/pig/test/TestForEachNestedPlan.java (original) +++ hadoop/pig/trunk/test/org/apache/pig/test/TestForEachNestedPlan.java Tue Dec 15 01:26:29 2009 @@ -18,28 +18,32 @@ package org.apache.pig.test; +import org.apache.pig.ExecType; import org.apache.pig.PigServer; import org.apache.pig.test.utils.TestHelper; import org.apache.pig.data.Tuple; +import org.apache.pig.impl.logicalLayer.parser.ParseException; import org.junit.Test; import junit.framework.TestCase; import junit.framework.Assert; import java.util.Iterator; +import java.util.List; import java.util.Random; import java.io.File; +import java.io.FileWriter; import java.io.IOException; +import java.io.PrintWriter; import java.text.DecimalFormat; public class TestForEachNestedPlan extends TestCase { - private String initString = "mapreduce"; MiniCluster cluster = MiniCluster.buildCluster(); private PigServer pig ; public TestForEachNestedPlan() throws Throwable { - pig = new PigServer(initString) ; + pig = new PigServer(ExecType.MAPREDUCE, cluster.getProperties()) ; } Boolean[] nullFlags = new Boolean[]{ false, true }; @@ -47,9 +51,11 @@ @Test public void testInnerOrderBy() throws Exception { for (int i = 0; i < nullFlags.length; i++) { - System.err.println("Running testInnerOrderBy with nullFlags set to :" + nullFlags[i]); + System.err.println("Running testInnerOrderBy with nullFlags set to :" + + nullFlags[i]); File tmpFile = genDataSetFile1(nullFlags[i]); - pig.registerQuery("a = load '" + Util.generateURI(tmpFile.toString()) + "'; "); + pig.registerQuery("a = load '" + + Util.generateURI(tmpFile.toString()) + "'; "); pig.registerQuery("b = group a by $0; "); pig.registerQuery("c = foreach b { " + " c1 = order $1 by *; " + " generate flatten(c1); " + "};"); @@ -68,7 +74,8 @@ @Test public void testInnerOrderByStarWithSchema() throws Exception { File tmpFile = genDataSetFile1(false); - pig.registerQuery("a = load '" + Util.generateURI(tmpFile.toString()) + "' as (a0, a1);"); + pig.registerQuery("a = load '" + Util.generateURI(tmpFile.toString()) + + "' as (a0, a1);"); pig.registerQuery("b = group a by a0; "); pig.registerQuery("c = foreach b { d = order a by *; " + " generate group, d; };"); @@ -82,29 +89,101 @@ } Assert.assertEquals(count, 10); } - - /* + @Test - public void testInnerDistinct() throws Exception { - File tmpFile = genDataSetFile1() ; - pig.registerQuery("a = load 'file:" + tmpFile + "'; ") ; - pig.registerQuery("b = group a by $0; "); - pig.registerQuery("c = foreach b { " - + " c1 = distinct $1 ; " - + " generate flatten(c1); " - + "};") ; - Iterator<Tuple> it = pig.openIterator("c"); - Tuple t = null ; - int count = 0 ; - while(it.hasNext()) { - t = it.next() ; - System.out.println(count + ":" + t) ; - count++ ; + public void testAlgebricFuncWithoutGroupBy() + throws IOException, ParseException { + String INPUT_FILE = "test-sum.txt"; + + PrintWriter w = new PrintWriter(new FileWriter(INPUT_FILE)); + w.println("10\t{(1),(2),(3)}"); + w.println("20\t{(4),(5),(6),(7)}"); + w.println("30\t{(8),(9)}"); + w.close(); + + try { + + Util.copyFromLocalToCluster(cluster, INPUT_FILE, INPUT_FILE); + + pig.registerQuery("a = load '" + INPUT_FILE + "' " + + "as (id:int, g:bag{t:tuple(u:int)});"); + pig.registerQuery("b = foreach a generate id, SUM(g);") ; + + Iterator<Tuple> iter = pig.openIterator("b"); + + List<Tuple> expectedResults = + Util.getTuplesFromConstantTupleStrings( + new String[] { + "(10,6L)", + "(20,22L)", + "(30,17L)" + }); + + int counter = 0; + while (iter.hasNext()) { + assertEquals(expectedResults.get(counter++).toString(), + iter.next().toString()); + } + + assertEquals(expectedResults.size(), counter); + + } finally{ + new File(INPUT_FILE).delete(); + try { + Util.deleteFile(cluster, INPUT_FILE); + } catch (IOException e) { + e.printStackTrace(); + Assert.fail(); + } } - Assert.assertEquals(count, 15); } - */ + @Test + public void testInnerDistinct() + throws IOException, ParseException { + String INPUT_FILE = "test-distinct.txt"; + + PrintWriter w = new PrintWriter(new FileWriter(INPUT_FILE)); + w.println("10\t89"); + w.println("20\t78"); + w.println("10\t68"); + w.println("10\t89"); + w.println("20\t92"); + w.close(); + + try { + Util.copyFromLocalToCluster(cluster, INPUT_FILE, INPUT_FILE); + + pig.registerQuery("A = load '" + INPUT_FILE + + "' as (age:int, gpa:int);"); + pig.registerQuery("B = group A by age;"); + pig.registerQuery("C = foreach B { D = A.gpa; E = distinct D; " + + "generate group, MIN(E); };"); + + Iterator<Tuple> iter = pig.openIterator("C"); + + List<Tuple> expectedResults = + Util.getTuplesFromConstantTupleStrings( + new String[] {"(10,68)", "(20,78)"}); + + int counter = 0; + while (iter.hasNext()) { + assertEquals(expectedResults.get(counter++).toString(), + iter.next().toString()); + } + + assertEquals(expectedResults.size(), counter); + } finally{ + new File(INPUT_FILE).delete(); + try { + Util.deleteFile(cluster, INPUT_FILE); + } catch (IOException e) { + e.printStackTrace(); + Assert.fail(); + } + } + } + /*** * For generating a sample dataset */ Modified: hadoop/pig/trunk/test/org/apache/pig/test/TestTypeChecking.java URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/test/org/apache/pig/test/TestTypeChecking.java?rev=890596&r1=890595&r2=890596&view=diff ============================================================================== --- hadoop/pig/trunk/test/org/apache/pig/test/TestTypeChecking.java (original) +++ hadoop/pig/trunk/test/org/apache/pig/test/TestTypeChecking.java Tue Dec 15 01:26:29 2009 @@ -18,8 +18,11 @@ package org.apache.pig.test; +import junit.framework.Assert; import junit.framework.TestCase; import org.apache.pig.impl.logicalLayer.*; +import org.apache.pig.impl.logicalLayer.validators.TypeCheckerException; +import org.apache.pig.impl.plan.PlanValidationException; import org.apache.pig.test.utils.TypeCheckingTestUtil; import org.apache.pig.test.utils.LogicalPlanTester; import org.junit.Test; @@ -247,7 +250,18 @@ planTester.buildPlan("a = load '/user/pig/tests/data/singlefile/studenttab10k' as (name:chararray, age:int, gpa:double);") ; LogicalPlan plan1 = planTester.buildPlan("b = foreach a generate (long)age as age:long, (int)gpa as gpa:int;") ; LogicalPlan plan2 = planTester.buildPlan("c = foreach b generate SUM(age), SUM(gpa);") ; - planTester.typeCheckPlan(plan2); + try { + planTester.typeCheckPlan(plan2); + } catch (PlanValidationException e) { + Throwable t = e.getCause(); + if (!(t instanceof TypeCheckerException)) { + System.out.println("t: " + t); + Assert.fail(); + } + return; + } + // shouldn't get here + Assert.fail(); } public void testSUM2() throws Throwable {