Author: sms Date: Tue Mar 3 00:22:24 2009 New Revision: 749487 URL: http://svn.apache.org/viewvc?rev=749487&view=rev Log: PIG-684: outputSchema method in TOKENIZE is broken (thejas via sms)
Modified: hadoop/pig/trunk/CHANGES.txt hadoop/pig/trunk/src/org/apache/pig/builtin/TOKENIZE.java hadoop/pig/trunk/test/org/apache/pig/test/TestLogicalPlanBuilder.java Modified: hadoop/pig/trunk/CHANGES.txt URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/CHANGES.txt?rev=749487&r1=749486&r2=749487&view=diff ============================================================================== --- hadoop/pig/trunk/CHANGES.txt (original) +++ hadoop/pig/trunk/CHANGES.txt Tue Mar 3 00:22:24 2009 @@ -434,3 +434,5 @@ PIG-591: Error handling phase four (sms via pradeepkth) PIG-664: Semantics of * is not consistent (sms) + + PIG-684: outputSchema method in TOKENIZE is broken (thejas via sms) Modified: hadoop/pig/trunk/src/org/apache/pig/builtin/TOKENIZE.java URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/src/org/apache/pig/builtin/TOKENIZE.java?rev=749487&r1=749486&r2=749487&view=diff ============================================================================== --- hadoop/pig/trunk/src/org/apache/pig/builtin/TOKENIZE.java (original) +++ hadoop/pig/trunk/src/org/apache/pig/builtin/TOKENIZE.java Tue Mar 3 00:22:24 2009 @@ -28,6 +28,7 @@ import org.apache.pig.data.DataType; import org.apache.pig.data.Tuple; import org.apache.pig.data.TupleFactory; +import org.apache.pig.impl.logicalLayer.FrontendException; import org.apache.pig.impl.logicalLayer.schema.Schema; @@ -58,8 +59,32 @@ @Override public Schema outputSchema(Schema input) { - Schema schema = new Schema(new Schema.FieldSchema("token", - DataType.CHARARRAY)); - return schema; + + try { + Schema.FieldSchema tokenFs = new Schema.FieldSchema("token", + DataType.CHARARRAY); + Schema tupleSchema = new Schema(tokenFs); + + Schema.FieldSchema tupleFs; + tupleFs = new Schema.FieldSchema("tuple_of_tokens", tupleSchema, + DataType.TUPLE); + + Schema bagSchema = new Schema(tupleFs); + bagSchema.setTwoLevelAccessRequired(true); + Schema.FieldSchema bagFs = new Schema.FieldSchema( + "bag_of_tokenTuples",bagSchema, DataType.BAG); + + return new Schema(bagFs); + + + + } catch (FrontendException e) { + // throwing RTE because + //above schema creation is not expected to throw an exception + // and also because superclass does not throw exception + throw new RuntimeException("Unable to compute TOKENIZE schema."); + } } -} + + +}; Modified: hadoop/pig/trunk/test/org/apache/pig/test/TestLogicalPlanBuilder.java URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/test/org/apache/pig/test/TestLogicalPlanBuilder.java?rev=749487&r1=749486&r2=749487&view=diff ============================================================================== --- hadoop/pig/trunk/test/org/apache/pig/test/TestLogicalPlanBuilder.java (original) +++ hadoop/pig/trunk/test/org/apache/pig/test/TestLogicalPlanBuilder.java Tue Mar 3 00:22:24 2009 @@ -1879,6 +1879,31 @@ buildPlan(query); } + + @Test + public void testTokenizeSchema() throws FrontendException, ParseException { + LogicalPlan lp; + LOForEach foreach; + + buildPlan("a = load 'one' as (f1: chararray);"); + lp = buildPlan("b = foreach a generate TOKENIZE(f1);"); + foreach = (LOForEach) lp.getLeaves().get(0); + + Schema.FieldSchema tokenFs = new Schema.FieldSchema("token", + DataType.CHARARRAY); + Schema tupleSchema = new Schema(tokenFs); + + Schema.FieldSchema tupleFs; + tupleFs = new Schema.FieldSchema("tuple_of_tokens", tupleSchema, + DataType.TUPLE); + + Schema bagSchema = new Schema(tupleFs); + Schema.FieldSchema bagFs = new Schema.FieldSchema( + "bag_of_tokenTuples",bagSchema, DataType.BAG); + + assertTrue(Schema.equals(foreach.getSchema(), new Schema(bagFs), false, true)); + } + private void printPlan(LogicalPlan lp) { LOPrinter graphPrinter = new LOPrinter(System.err, lp); System.err.println("Printing the logical plan");