rding
Fri, 22 Jan 2010 14:22:07 -0800
Author: rding Date: Fri Jan 22 22:21:36 2010 New Revision: 902295 URL: http://svn.apache.org/viewvc?rev=902295&view=rev Log: PIG-1090: Update sources to reflect recent changes in load-store interfaces Modified: hadoop/pig/branches/load-store-redesign/contrib/piggybank/java/src/test/java/org/apache/pig/piggybank/test/TestPigStorageSchema.java hadoop/pig/branches/load-store-redesign/src/org/apache/pig/ResourceSchema.java hadoop/pig/branches/load-store-redesign/test/org/apache/pig/test/TestResourceSchema.java Modified: hadoop/pig/branches/load-store-redesign/contrib/piggybank/java/src/test/java/org/apache/pig/piggybank/test/TestPigStorageSchema.java URL: http://svn.apache.org/viewvc/hadoop/pig/branches/load-store-redesign/contrib/piggybank/java/src/test/java/org/apache/pig/piggybank/test/TestPigStorageSchema.java?rev=902295&r1=902294&r2=902295&view=diff ============================================================================== --- hadoop/pig/branches/load-store-redesign/contrib/piggybank/java/src/test/java/org/apache/pig/piggybank/test/TestPigStorageSchema.java (original) +++ hadoop/pig/branches/load-store-redesign/contrib/piggybank/java/src/test/java/org/apache/pig/piggybank/test/TestPigStorageSchema.java Fri Jan 22 22:21:36 2010 @@ -19,13 +19,19 @@ package org.apache.pig.piggybank.test; +import static org.junit.Assert.assertEquals; + import java.util.HashMap; +import java.util.Iterator; +import java.util.List; import java.util.Map; import java.util.Properties; import org.apache.pig.ExecType; import org.apache.pig.PigServer; +import org.apache.pig.ResourceSchema; import org.apache.pig.data.DataType; +import org.apache.pig.data.Tuple; import org.apache.pig.impl.PigContext; import org.apache.pig.impl.io.FileLocalizer; import org.apache.pig.impl.logicalLayer.LogicalOperator; @@ -108,4 +114,80 @@ assertTrue("explicit schema overrides metadata", Schema.equals(newSchema, newGenSchema, true, false)); } + + @Test + public void testSchemaConversion() throws Exception { + + Util.createInputFile(cluster, "originput2", + new String[] {"1", "2", "3", "2", + "5", "5", "8", "8", + "8", "9"}); + + pig.registerQuery("A = LOAD 'originput2' using org.apache.pig.piggybank.storage.PigStorageSchema() as (f:int);"); + pig.registerQuery("B = group A by f;"); + Schema origSchema = pig.dumpSchema("B"); + ResourceSchema rs1 = new ResourceSchema(origSchema); + pig.registerQuery("STORE B into 'bout' using org.apache.pig.piggybank.storage.PigStorageSchema();"); + + pig.registerQuery("C = LOAD 'bout' using org.apache.pig.piggybank.storage.PigStorageSchema();"); + Schema genSchema = pig.dumpSchema("C"); + ResourceSchema rs2 = new ResourceSchema(genSchema); + assertTrue("generated schema equals original" , ResourceSchema.equals(rs1, rs2)); + + pig.registerQuery("C1 = LOAD 'bout' as (a0:int, A: {t: (f:int) } );"); + pig.registerQuery("D = foreach C1 generate a0, SUM(A);"); + + List<Tuple> expectedResults = Util.getTuplesFromConstantTupleStrings( + new String[] { + "(1,1L)", + "(2,4L)", + "(3,3L)", + "(5,10L)", + "(8,24L)", + "(9,9L)" + }); + + Iterator<Tuple> iter = pig.openIterator("D"); + int counter = 0; + while (iter.hasNext()) { + assertEquals(expectedResults.get(counter++).toString(), iter.next().toString()); + } + + assertEquals(expectedResults.size(), counter); + } + + @Test + public void testSchemaConversion2() throws Exception { + + pig.registerQuery("A = LOAD 'originput' using org.apache.pig.piggybank.storage.PigStorageSchema(',') as (f1:chararray, f2:int);"); + pig.registerQuery("B = group A by f1;"); + Schema origSchema = pig.dumpSchema("B"); + ResourceSchema rs1 = new ResourceSchema(origSchema); + pig.registerQuery("STORE B into 'bout' using org.apache.pig.piggybank.storage.PigStorageSchema();"); + + pig.registerQuery("C = LOAD 'bout' using org.apache.pig.piggybank.storage.PigStorageSchema();"); + Schema genSchema = pig.dumpSchema("C"); + ResourceSchema rs2 = new ResourceSchema(genSchema); + assertTrue("generated schema equals original" , ResourceSchema.equals(rs1, rs2)); + + pig.registerQuery("C1 = LOAD 'bout' as (a0:chararray, A: {t: (f1:chararray, f2:int) } );"); + pig.registerQuery("D = foreach C1 generate a0, SUM(A.f2);"); + + List<Tuple> expectedResults = Util.getTuplesFromConstantTupleStrings( + new String[] { + "('A',23L)", + "('B',7L)", + "('C',11L)", + "('D',10L)" + }); + + Iterator<Tuple> iter = pig.openIterator("D"); + int counter = 0; + while (iter.hasNext()) { + assertEquals(expectedResults.get(counter++).toString(), iter.next().toString()); + } + + assertEquals(expectedResults.size(), counter); + } + } Modified: hadoop/pig/branches/load-store-redesign/src/org/apache/pig/ResourceSchema.java URL: http://svn.apache.org/viewvc/hadoop/pig/branches/load-store-redesign/src/org/apache/pig/ResourceSchema.java?rev=902295&r1=902294&r2=902295&view=diff ============================================================================== --- hadoop/pig/branches/load-store-redesign/src/org/apache/pig/ResourceSchema.java (original) +++ hadoop/pig/branches/load-store-redesign/src/org/apache/pig/ResourceSchema.java Fri Jan 22 22:21:36 2010 @@ -22,6 +22,8 @@ import java.util.Arrays; import java.util.List; +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; import org.apache.pig.data.DataType; import org.apache.pig.impl.logicalLayer.schema.Schema; import org.apache.pig.impl.logicalLayer.schema.Schema.FieldSchema; @@ -29,6 +31,9 @@ public class ResourceSchema implements Serializable { private static final long serialVersionUID = 1L; + + private static Log log = LogFactory.getLog(ResourceSchema.class); + /* Array Getters intentionally return mutable arrays instead of copies, * to simplify updates without unnecessary copying. * Setters make a copy of the arrays in order to prevent an array @@ -66,16 +71,18 @@ type = fieldSchema.type; name = fieldSchema.alias; description = "autogenerated from Pig Field Schema"; - if (type == DataType.BAG && fieldSchema.schema != null) { // allow partial schema - List<FieldSchema> slst = fieldSchema.schema.getFields(); - if (slst.size() != 1 || slst.get(0).type != DataType.TUPLE) { - throw new IllegalArgumentException("Invalid Pig schema: " + - "bag schema must have tuple as its field."); - } + Schema inner = fieldSchema.schema; + if (type == DataType.BAG && fieldSchema.schema != null + && !fieldSchema.schema.isTwoLevelAccessRequired()) { + log.info("Insert two-level access to Resource Schema"); + FieldSchema fs = new FieldSchema("t", fieldSchema.schema); + inner = new Schema(fs); } - // XXX allow partial schema - if (type == DataType.BAG || type == DataType.TUPLE) { - schema = new ResourceSchema(fieldSchema.schema); + + // allow partial schema + if ((type == DataType.BAG || type == DataType.TUPLE) + && inner != null) { + schema = new ResourceSchema(inner); } else { schema = null; } Modified: hadoop/pig/branches/load-store-redesign/test/org/apache/pig/test/TestResourceSchema.java URL: http://svn.apache.org/viewvc/hadoop/pig/branches/load-store-redesign/test/org/apache/pig/test/TestResourceSchema.java?rev=902295&r1=902294&r2=902295&view=diff ============================================================================== --- hadoop/pig/branches/load-store-redesign/test/org/apache/pig/test/TestResourceSchema.java (original) +++ hadoop/pig/branches/load-store-redesign/test/org/apache/pig/test/TestResourceSchema.java Fri Jan 22 22:21:36 2010 @@ -194,9 +194,9 @@ } /** - * Test invalid Pig Schema: multiple fields for a bag + * Test one-level Pig Schema: multiple fields for a bag */ - @Test(expected=IllegalArgumentException.class) + @Test public void testResourceSchemaWithInvalidPigSchema() throws FrontendException { String [] aliases ={"f1", "f2"}; @@ -206,13 +206,14 @@ Schema.FieldSchema fld0 = new Schema.FieldSchema("f0", level0, DataType.BAG); Schema level1 = new Schema(fld0); - new ResourceSchema(level1); + Schema genSchema = Schema.getPigSchema(new ResourceSchema(level1)); + assertTrue(CheckTwoLevelAccess(genSchema)); } /** - * Test invalid Pig Schema: bag without tuple field + * Test one-level Pig Schema: bag without tuple field */ - @Test(expected=IllegalArgumentException.class) + @Test public void testResourceSchemaWithInvalidPigSchema2() throws FrontendException { String [] aliases ={"f1"}; @@ -222,6 +223,7 @@ Schema.FieldSchema fld0 = new Schema.FieldSchema("f0", level0, DataType.BAG); Schema level1 = new Schema(fld0); - new ResourceSchema(level1); + Schema genSchema = Schema.getPigSchema(new ResourceSchema(level1)); + assertTrue(CheckTwoLevelAccess(genSchema)); } }