Author: rding
Date: Fri Jan 22 22:21:36 2010
New Revision: 902295
URL: http://svn.apache.org/viewvc?rev=902295&view=rev
Log:
PIG-1090: Update sources to reflect recent changes in load-store interfaces
Modified:
hadoop/pig/branches/load-store-redesign/contrib/piggybank/java/src/test/java/org/apache/pig/piggybank/test/TestPigStorageSchema.java
hadoop/pig/branches/load-store-redesign/src/org/apache/pig/ResourceSchema.java
hadoop/pig/branches/load-store-redesign/test/org/apache/pig/test/TestResourceSchema.java
Modified:
hadoop/pig/branches/load-store-redesign/contrib/piggybank/java/src/test/java/org/apache/pig/piggybank/test/TestPigStorageSchema.java
URL:
http://svn.apache.org/viewvc/hadoop/pig/branches/load-store-redesign/contrib/piggybank/java/src/test/java/org/apache/pig/piggybank/test/TestPigStorageSchema.java?rev=902295&r1=902294&r2=902295&view=diff
==============================================================================
---
hadoop/pig/branches/load-store-redesign/contrib/piggybank/java/src/test/java/org/apache/pig/piggybank/test/TestPigStorageSchema.java
(original)
+++
hadoop/pig/branches/load-store-redesign/contrib/piggybank/java/src/test/java/org/apache/pig/piggybank/test/TestPigStorageSchema.java
Fri Jan 22 22:21:36 2010
@@ -19,13 +19,19 @@
package org.apache.pig.piggybank.test;
+import static org.junit.Assert.assertEquals;
+
import java.util.HashMap;
+import java.util.Iterator;
+import java.util.List;
import java.util.Map;
import java.util.Properties;
import org.apache.pig.ExecType;
import org.apache.pig.PigServer;
+import org.apache.pig.ResourceSchema;
import org.apache.pig.data.DataType;
+import org.apache.pig.data.Tuple;
import org.apache.pig.impl.PigContext;
import org.apache.pig.impl.io.FileLocalizer;
import org.apache.pig.impl.logicalLayer.LogicalOperator;
@@ -108,4 +114,80 @@
assertTrue("explicit schema overrides metadata",
Schema.equals(newSchema, newGenSchema, true, false));
}
+
+ @Test
+ public void testSchemaConversion() throws Exception {
+
+ Util.createInputFile(cluster, "originput2",
+ new String[] {"1", "2", "3", "2",
+ "5", "5", "8", "8",
+ "8", "9"});
+
+ pig.registerQuery("A = LOAD 'originput2' using
org.apache.pig.piggybank.storage.PigStorageSchema() as (f:int);");
+ pig.registerQuery("B = group A by f;");
+ Schema origSchema = pig.dumpSchema("B");
+ ResourceSchema rs1 = new ResourceSchema(origSchema);
+ pig.registerQuery("STORE B into 'bout' using
org.apache.pig.piggybank.storage.PigStorageSchema();");
+
+ pig.registerQuery("C = LOAD 'bout' using
org.apache.pig.piggybank.storage.PigStorageSchema();");
+ Schema genSchema = pig.dumpSchema("C");
+ ResourceSchema rs2 = new ResourceSchema(genSchema);
+ assertTrue("generated schema equals original" ,
ResourceSchema.equals(rs1, rs2));
+
+ pig.registerQuery("C1 = LOAD 'bout' as (a0:int, A: {t: (f:int) } );");
+ pig.registerQuery("D = foreach C1 generate a0, SUM(A);");
+
+ List<Tuple> expectedResults = Util.getTuplesFromConstantTupleStrings(
+ new String[] {
+ "(1,1L)",
+ "(2,4L)",
+ "(3,3L)",
+ "(5,10L)",
+ "(8,24L)",
+ "(9,9L)"
+ });
+
+ Iterator<Tuple> iter = pig.openIterator("D");
+ int counter = 0;
+ while (iter.hasNext()) {
+ assertEquals(expectedResults.get(counter++).toString(),
iter.next().toString());
+ }
+
+ assertEquals(expectedResults.size(), counter);
+ }
+
+ @Test
+ public void testSchemaConversion2() throws Exception {
+
+ pig.registerQuery("A = LOAD 'originput' using
org.apache.pig.piggybank.storage.PigStorageSchema(',') as (f1:chararray,
f2:int);");
+ pig.registerQuery("B = group A by f1;");
+ Schema origSchema = pig.dumpSchema("B");
+ ResourceSchema rs1 = new ResourceSchema(origSchema);
+ pig.registerQuery("STORE B into 'bout' using
org.apache.pig.piggybank.storage.PigStorageSchema();");
+
+ pig.registerQuery("C = LOAD 'bout' using
org.apache.pig.piggybank.storage.PigStorageSchema();");
+ Schema genSchema = pig.dumpSchema("C");
+ ResourceSchema rs2 = new ResourceSchema(genSchema);
+ assertTrue("generated schema equals original" ,
ResourceSchema.equals(rs1, rs2));
+
+ pig.registerQuery("C1 = LOAD 'bout' as (a0:chararray, A: {t:
(f1:chararray, f2:int) } );");
+ pig.registerQuery("D = foreach C1 generate a0, SUM(A.f2);");
+
+ List<Tuple> expectedResults = Util.getTuplesFromConstantTupleStrings(
+ new String[] {
+ "('A',23L)",
+ "('B',7L)",
+ "('C',11L)",
+ "('D',10L)"
+ });
+
+ Iterator<Tuple> iter = pig.openIterator("D");
+ int counter = 0;
+ while (iter.hasNext()) {
+ assertEquals(expectedResults.get(counter++).toString(),
iter.next().toString());
+ }
+
+ assertEquals(expectedResults.size(), counter);
+ }
+
}
Modified:
hadoop/pig/branches/load-store-redesign/src/org/apache/pig/ResourceSchema.java
URL:
http://svn.apache.org/viewvc/hadoop/pig/branches/load-store-redesign/src/org/apache/pig/ResourceSchema.java?rev=902295&r1=902294&r2=902295&view=diff
==============================================================================
---
hadoop/pig/branches/load-store-redesign/src/org/apache/pig/ResourceSchema.java
(original)
+++
hadoop/pig/branches/load-store-redesign/src/org/apache/pig/ResourceSchema.java
Fri Jan 22 22:21:36 2010
@@ -22,6 +22,8 @@
import java.util.Arrays;
import java.util.List;
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
import org.apache.pig.data.DataType;
import org.apache.pig.impl.logicalLayer.schema.Schema;
import org.apache.pig.impl.logicalLayer.schema.Schema.FieldSchema;
@@ -29,6 +31,9 @@
public class ResourceSchema implements Serializable {
private static final long serialVersionUID = 1L;
+
+ private static Log log = LogFactory.getLog(ResourceSchema.class);
+
/* Array Getters intentionally return mutable arrays instead of copies,
* to simplify updates without unnecessary copying.
* Setters make a copy of the arrays in order to prevent an array
@@ -66,16 +71,18 @@
type = fieldSchema.type;
name = fieldSchema.alias;
description = "autogenerated from Pig Field Schema";
- if (type == DataType.BAG && fieldSchema.schema != null) { // allow
partial schema
- List<FieldSchema> slst = fieldSchema.schema.getFields();
- if (slst.size() != 1 || slst.get(0).type != DataType.TUPLE) {
- throw new IllegalArgumentException("Invalid Pig schema: " +
- "bag schema must have tuple as its field.");
- }
+ Schema inner = fieldSchema.schema;
+ if (type == DataType.BAG && fieldSchema.schema != null
+ && !fieldSchema.schema.isTwoLevelAccessRequired()) {
+ log.info("Insert two-level access to Resource Schema");
+ FieldSchema fs = new FieldSchema("t", fieldSchema.schema);
+ inner = new Schema(fs);
}
- // XXX allow partial schema
- if (type == DataType.BAG || type == DataType.TUPLE) {
- schema = new ResourceSchema(fieldSchema.schema);
+
+ // allow partial schema
+ if ((type == DataType.BAG || type == DataType.TUPLE)
+ && inner != null) {
+ schema = new ResourceSchema(inner);
} else {
schema = null;
}
Modified:
hadoop/pig/branches/load-store-redesign/test/org/apache/pig/test/TestResourceSchema.java
URL:
http://svn.apache.org/viewvc/hadoop/pig/branches/load-store-redesign/test/org/apache/pig/test/TestResourceSchema.java?rev=902295&r1=902294&r2=902295&view=diff
==============================================================================
---
hadoop/pig/branches/load-store-redesign/test/org/apache/pig/test/TestResourceSchema.java
(original)
+++
hadoop/pig/branches/load-store-redesign/test/org/apache/pig/test/TestResourceSchema.java
Fri Jan 22 22:21:36 2010
@@ -194,9 +194,9 @@
}
/**
- * Test invalid Pig Schema: multiple fields for a bag
+ * Test one-level Pig Schema: multiple fields for a bag
*/
- @Test(expected=IllegalArgumentException.class)
+ @Test
public void testResourceSchemaWithInvalidPigSchema()
throws FrontendException {
String [] aliases ={"f1", "f2"};
@@ -206,13 +206,14 @@
Schema.FieldSchema fld0 =
new Schema.FieldSchema("f0", level0, DataType.BAG);
Schema level1 = new Schema(fld0);
- new ResourceSchema(level1);
+ Schema genSchema = Schema.getPigSchema(new ResourceSchema(level1));
+ assertTrue(CheckTwoLevelAccess(genSchema));
}
/**
- * Test invalid Pig Schema: bag without tuple field
+ * Test one-level Pig Schema: bag without tuple field
*/
- @Test(expected=IllegalArgumentException.class)
+ @Test
public void testResourceSchemaWithInvalidPigSchema2()
throws FrontendException {
String [] aliases ={"f1"};
@@ -222,6 +223,7 @@
Schema.FieldSchema fld0 =
new Schema.FieldSchema("f0", level0, DataType.BAG);
Schema level1 = new Schema(fld0);
- new ResourceSchema(level1);
+ Schema genSchema = Schema.getPigSchema(new ResourceSchema(level1));
+ assertTrue(CheckTwoLevelAccess(genSchema));
}
}