Author: olga
Date: Tue Dec 15 01:26:29 2009
New Revision: 890596
URL: http://svn.apache.org/viewvc?rev=890596&view=rev
Log:
PIG-973: type resolution inconsistency (rding via olgan)
Modified:
hadoop/pig/trunk/CHANGES.txt
hadoop/pig/trunk/src/org/apache/pig/impl/logicalLayer/validators/TypeCheckingVisitor.java
hadoop/pig/trunk/test/org/apache/pig/test/TestForEachNestedPlan.java
hadoop/pig/trunk/test/org/apache/pig/test/TestTypeChecking.java
Modified: hadoop/pig/trunk/CHANGES.txt
URL:
http://svn.apache.org/viewvc/hadoop/pig/trunk/CHANGES.txt?rev=890596&r1=890595&r2=890596&view=diff
==============================================================================
--- hadoop/pig/trunk/CHANGES.txt (original)
+++ hadoop/pig/trunk/CHANGES.txt Tue Dec 15 01:26:29 2009
@@ -51,6 +51,8 @@
BUG FIXES
+PIG-973: type resolution inconsistency (rding via olgan)
+
PIG-1135: skewed join partitioner returns negative partition index (yinghe
via olgan)
Modified:
hadoop/pig/trunk/src/org/apache/pig/impl/logicalLayer/validators/TypeCheckingVisitor.java
URL:
http://svn.apache.org/viewvc/hadoop/pig/trunk/src/org/apache/pig/impl/logicalLayer/validators/TypeCheckingVisitor.java?rev=890596&r1=890595&r2=890596&view=diff
==============================================================================
---
hadoop/pig/trunk/src/org/apache/pig/impl/logicalLayer/validators/TypeCheckingVisitor.java
(original)
+++
hadoop/pig/trunk/src/org/apache/pig/impl/logicalLayer/validators/TypeCheckingVisitor.java
Tue Dec 15 01:26:29 2009
@@ -1084,34 +1084,6 @@
}
EvalFunc<?> ef = (EvalFunc<?>)
PigContext.instantiateFuncFromSpec(func.getFuncSpec());
-
- // If the function is algebraic and the project is just sentinel
- // (special case when we apply aggregate on flattened members)
- // then it will never match algebraic functions' schemas
- // without this
-
- // Assuming all aggregates has only one argument at this stage
- if(func.getArguments()!=null && func.getArguments().size()>0){
- ExpressionOperator tmpExp = func.getArguments().get(0) ;
- if ( (ef instanceof Algebraic)
- && (tmpExp instanceof LOProject)
- && (((LOProject)tmpExp).getSentinel())) {
-
- FieldSchema tmpField ;
-
- try {
- // embed the schema above inside a bag
- tmpField = new FieldSchema(null, s, DataType.BAG) ;
- }
- catch (FrontendException e) {
- int errCode = 1023;
- String msg = "Unable to create new field schema.";
- throw new TypeCheckerException(msg, errCode,
PigException.INPUT, e) ;
- }
-
- s = new Schema(tmpField) ;
- }
- }
// ask the EvalFunc what types of inputs it can handle
List<FuncSpec> funcSpecs = null;
Modified: hadoop/pig/trunk/test/org/apache/pig/test/TestForEachNestedPlan.java
URL:
http://svn.apache.org/viewvc/hadoop/pig/trunk/test/org/apache/pig/test/TestForEachNestedPlan.java?rev=890596&r1=890595&r2=890596&view=diff
==============================================================================
--- hadoop/pig/trunk/test/org/apache/pig/test/TestForEachNestedPlan.java
(original)
+++ hadoop/pig/trunk/test/org/apache/pig/test/TestForEachNestedPlan.java Tue
Dec 15 01:26:29 2009
@@ -18,28 +18,32 @@
package org.apache.pig.test;
+import org.apache.pig.ExecType;
import org.apache.pig.PigServer;
import org.apache.pig.test.utils.TestHelper;
import org.apache.pig.data.Tuple;
+import org.apache.pig.impl.logicalLayer.parser.ParseException;
import org.junit.Test;
import junit.framework.TestCase;
import junit.framework.Assert;
import java.util.Iterator;
+import java.util.List;
import java.util.Random;
import java.io.File;
+import java.io.FileWriter;
import java.io.IOException;
+import java.io.PrintWriter;
import java.text.DecimalFormat;
public class TestForEachNestedPlan extends TestCase {
- private String initString = "mapreduce";
MiniCluster cluster = MiniCluster.buildCluster();
private PigServer pig ;
public TestForEachNestedPlan() throws Throwable {
- pig = new PigServer(initString) ;
+ pig = new PigServer(ExecType.MAPREDUCE, cluster.getProperties()) ;
}
Boolean[] nullFlags = new Boolean[]{ false, true };
@@ -47,9 +51,11 @@
@Test
public void testInnerOrderBy() throws Exception {
for (int i = 0; i < nullFlags.length; i++) {
- System.err.println("Running testInnerOrderBy with nullFlags set to
:" + nullFlags[i]);
+ System.err.println("Running testInnerOrderBy with nullFlags set to
:"
+ + nullFlags[i]);
File tmpFile = genDataSetFile1(nullFlags[i]);
- pig.registerQuery("a = load '" +
Util.generateURI(tmpFile.toString()) + "'; ");
+ pig.registerQuery("a = load '"
+ + Util.generateURI(tmpFile.toString()) + "'; ");
pig.registerQuery("b = group a by $0; ");
pig.registerQuery("c = foreach b { " + " c1 = order $1 by *; "
+ " generate flatten(c1); " + "};");
@@ -68,7 +74,8 @@
@Test
public void testInnerOrderByStarWithSchema() throws Exception {
File tmpFile = genDataSetFile1(false);
- pig.registerQuery("a = load '" + Util.generateURI(tmpFile.toString())
+ "' as (a0, a1);");
+ pig.registerQuery("a = load '" + Util.generateURI(tmpFile.toString())
+ + "' as (a0, a1);");
pig.registerQuery("b = group a by a0; ");
pig.registerQuery("c = foreach b { d = order a by *; "
+ " generate group, d; };");
@@ -82,29 +89,101 @@
}
Assert.assertEquals(count, 10);
}
-
- /*
+
@Test
- public void testInnerDistinct() throws Exception {
- File tmpFile = genDataSetFile1() ;
- pig.registerQuery("a = load 'file:" + tmpFile + "'; ") ;
- pig.registerQuery("b = group a by $0; ");
- pig.registerQuery("c = foreach b { "
- + " c1 = distinct $1 ; "
- + " generate flatten(c1); "
- + "};") ;
- Iterator<Tuple> it = pig.openIterator("c");
- Tuple t = null ;
- int count = 0 ;
- while(it.hasNext()) {
- t = it.next() ;
- System.out.println(count + ":" + t) ;
- count++ ;
+ public void testAlgebricFuncWithoutGroupBy()
+ throws IOException, ParseException {
+ String INPUT_FILE = "test-sum.txt";
+
+ PrintWriter w = new PrintWriter(new FileWriter(INPUT_FILE));
+ w.println("10\t{(1),(2),(3)}");
+ w.println("20\t{(4),(5),(6),(7)}");
+ w.println("30\t{(8),(9)}");
+ w.close();
+
+ try {
+
+ Util.copyFromLocalToCluster(cluster, INPUT_FILE, INPUT_FILE);
+
+ pig.registerQuery("a = load '" + INPUT_FILE + "' "
+ + "as (id:int, g:bag{t:tuple(u:int)});");
+ pig.registerQuery("b = foreach a generate id, SUM(g);") ;
+
+ Iterator<Tuple> iter = pig.openIterator("b");
+
+ List<Tuple> expectedResults =
+ Util.getTuplesFromConstantTupleStrings(
+ new String[] {
+ "(10,6L)",
+ "(20,22L)",
+ "(30,17L)"
+ });
+
+ int counter = 0;
+ while (iter.hasNext()) {
+ assertEquals(expectedResults.get(counter++).toString(),
+ iter.next().toString());
+ }
+
+ assertEquals(expectedResults.size(), counter);
+
+ } finally{
+ new File(INPUT_FILE).delete();
+ try {
+ Util.deleteFile(cluster, INPUT_FILE);
+ } catch (IOException e) {
+ e.printStackTrace();
+ Assert.fail();
+ }
}
- Assert.assertEquals(count, 15);
}
- */
+ @Test
+ public void testInnerDistinct()
+ throws IOException, ParseException {
+ String INPUT_FILE = "test-distinct.txt";
+
+ PrintWriter w = new PrintWriter(new FileWriter(INPUT_FILE));
+ w.println("10\t89");
+ w.println("20\t78");
+ w.println("10\t68");
+ w.println("10\t89");
+ w.println("20\t92");
+ w.close();
+
+ try {
+ Util.copyFromLocalToCluster(cluster, INPUT_FILE, INPUT_FILE);
+
+ pig.registerQuery("A = load '" + INPUT_FILE
+ + "' as (age:int, gpa:int);");
+ pig.registerQuery("B = group A by age;");
+ pig.registerQuery("C = foreach B { D = A.gpa; E = distinct D; " +
+ "generate group, MIN(E); };");
+
+ Iterator<Tuple> iter = pig.openIterator("C");
+
+ List<Tuple> expectedResults =
+ Util.getTuplesFromConstantTupleStrings(
+ new String[] {"(10,68)", "(20,78)"});
+
+ int counter = 0;
+ while (iter.hasNext()) {
+ assertEquals(expectedResults.get(counter++).toString(),
+ iter.next().toString());
+ }
+
+ assertEquals(expectedResults.size(), counter);
+ } finally{
+ new File(INPUT_FILE).delete();
+ try {
+ Util.deleteFile(cluster, INPUT_FILE);
+ } catch (IOException e) {
+ e.printStackTrace();
+ Assert.fail();
+ }
+ }
+ }
+
/***
* For generating a sample dataset
*/
Modified: hadoop/pig/trunk/test/org/apache/pig/test/TestTypeChecking.java
URL:
http://svn.apache.org/viewvc/hadoop/pig/trunk/test/org/apache/pig/test/TestTypeChecking.java?rev=890596&r1=890595&r2=890596&view=diff
==============================================================================
--- hadoop/pig/trunk/test/org/apache/pig/test/TestTypeChecking.java (original)
+++ hadoop/pig/trunk/test/org/apache/pig/test/TestTypeChecking.java Tue Dec 15
01:26:29 2009
@@ -18,8 +18,11 @@
package org.apache.pig.test;
+import junit.framework.Assert;
import junit.framework.TestCase;
import org.apache.pig.impl.logicalLayer.*;
+import org.apache.pig.impl.logicalLayer.validators.TypeCheckerException;
+import org.apache.pig.impl.plan.PlanValidationException;
import org.apache.pig.test.utils.TypeCheckingTestUtil;
import org.apache.pig.test.utils.LogicalPlanTester;
import org.junit.Test;
@@ -247,7 +250,18 @@
planTester.buildPlan("a = load
'/user/pig/tests/data/singlefile/studenttab10k' as (name:chararray, age:int,
gpa:double);") ;
LogicalPlan plan1 = planTester.buildPlan("b = foreach a generate
(long)age as age:long, (int)gpa as gpa:int;") ;
LogicalPlan plan2 = planTester.buildPlan("c = foreach b generate
SUM(age), SUM(gpa);") ;
- planTester.typeCheckPlan(plan2);
+ try {
+ planTester.typeCheckPlan(plan2);
+ } catch (PlanValidationException e) {
+ Throwable t = e.getCause();
+ if (!(t instanceof TypeCheckerException)) {
+ System.out.println("t: " + t);
+ Assert.fail();
+ }
+ return;
+ }
+ // shouldn't get here
+ Assert.fail();
}
public void testSUM2() throws Throwable {