svn commit: r884353 - in /hadoop/pig/branches/branch-0.6/contrib/zebra: ./ src/java/org/apache/hadoop/zebra/schema/ src/test/org/apache/hadoop/zebra/types/

gates Wed, 25 Nov 2009 17:09:25 -0800

Author: gates
Date: Thu Nov 26 01:08:59 2009
New Revision: 884353

URL: http://svn.apache.org/viewvc?rev=884353&view=rev
Log:
PIG-1095: Schema support of anonymous fields in COLECTION fails.



Added:
    
hadoop/pig/branches/branch-0.6/contrib/zebra/src/test/org/apache/hadoop/zebra/types/TestSchemaAnonymousCollection.java
Modified:
    hadoop/pig/branches/branch-0.6/contrib/zebra/CHANGES.txt
    
hadoop/pig/branches/branch-0.6/contrib/zebra/src/java/org/apache/hadoop/zebra/schema/Schema.java
    
hadoop/pig/branches/branch-0.6/contrib/zebra/src/java/org/apache/hadoop/zebra/schema/SchemaParser.jjt
    
hadoop/pig/branches/branch-0.6/contrib/zebra/src/test/org/apache/hadoop/zebra/types/TestSchemaMap.java

Modified: hadoop/pig/branches/branch-0.6/contrib/zebra/CHANGES.txt
URL: 
http://svn.apache.org/viewvc/hadoop/pig/branches/branch-0.6/contrib/zebra/CHANGES.txt?rev=884353&r1=884352&r2=884353&view=diff
==============================================================================
--- hadoop/pig/branches/branch-0.6/contrib/zebra/CHANGES.txt (original)
+++ hadoop/pig/branches/branch-0.6/contrib/zebra/CHANGES.txt Thu Nov 26 
01:08:59 2009
@@ -26,6 +26,8 @@
   OPTIMIZATIONS
 
   BUG FIXES
+    PIG-1095: Schema support of anonymous fields in COLECTION fails (yanz via
+                               gates)
 
     PIG-1078: merge join with empty table failed (yanz via gates)
 

Modified: 
hadoop/pig/branches/branch-0.6/contrib/zebra/src/java/org/apache/hadoop/zebra/schema/Schema.java
URL: 
http://svn.apache.org/viewvc/hadoop/pig/branches/branch-0.6/contrib/zebra/src/java/org/apache/hadoop/zebra/schema/Schema.java?rev=884353&r1=884352&r2=884353&view=diff
==============================================================================
--- 
hadoop/pig/branches/branch-0.6/contrib/zebra/src/java/org/apache/hadoop/zebra/schema/Schema.java
 (original)
+++ 
hadoop/pig/branches/branch-0.6/contrib/zebra/src/java/org/apache/hadoop/zebra/schema/Schema.java
 Thu Nov 26 01:08:59 2009
@@ -332,7 +332,7 @@
 
   private ArrayList<ColumnSchema> mFields;
   private HashMap<String, ColumnSchema> mNames;
-  private boolean dupColNameAllowed;
+  private boolean projection;
 
   /**
    * Constructor - schema for empty schema (zero-column) .
@@ -342,6 +342,17 @@
   }
 
   /**
+   * Constructor - schema for empty projection/schema (zero-column) .
+   *
+   * @param projection
+   *           A projection schema or not
+   */
+  public Schema(boolean projection) {
+    this.projection = projection;
+    init();
+  }
+
+  /**
    * Constructor - create a schema from a string representation.
    * 
    * @param schema
@@ -355,10 +366,9 @@
     init(schema, false);
   }
 
-  public Schema(String schema, boolean dupAllowed) throws ParseException {
-    dupColNameAllowed = dupAllowed;
-    // suppose if duplicate is allowed, then it's from projection and hence 
virtual column is allowed
-    init(schema, dupAllowed);
+  public Schema(String schema, boolean projection) throws ParseException {
+    this.projection = projection;
+    init(schema, projection);
   }
 
   public Schema(ColumnSchema fs) throws ParseException {
@@ -384,18 +394,16 @@
    *          Column to be added to the schema
    */
   public void add(ColumnSchema f) throws ParseException {
-    add(f, false);
-  }
-
-  private void add(ColumnSchema f, boolean dupAllowed) throws ParseException {
     if (f == null) {
+      if (!projection)
+        throw new ParseException("Empty column schema is not allowed");
       mFields.add(null);
       return;
     }
     f.index = mFields.size();
     mFields.add(f);
     if (null != f && null != f.name) {
-      if (mNames.put(f.name, f) != null && !dupAllowed && !dupColNameAllowed)
+      if (mNames.put(f.name, f) != null && !projection)
         throw new ParseException("Duplicate field name: " + f.name);
     }
   }
@@ -684,7 +692,7 @@
     org.apache.hadoop.zebra.tfile.Utils.writeString(out, toString());
   }
 
-  private void init(String[] columnNames, boolean virtualColAllowed) throws 
ParseException {
+  private void init(String[] columnNames, boolean projection) throws 
ParseException {
     // the arg must be of type or they will be treated as the default type
     mFields = new ArrayList<ColumnSchema>();
     mNames = new HashMap<String, ColumnSchema>();
@@ -698,7 +706,10 @@
     }
     TableSchemaParser parser =
         new TableSchemaParser(new StringReader(sb.toString()));
-    parser.RecordSchema(this, virtualColAllowed);
+    if (projection)
+      parser.ProjectionSchema(this);
+    else
+      parser.RecordSchema(this);
   }
 
   private void init() {
@@ -706,7 +717,7 @@
     mNames = new HashMap<String, ColumnSchema>();
   }
 
-  private void init(String columnString, boolean virtualColAllowed) throws 
ParseException {
+  private void init(String columnString, boolean projection) throws 
ParseException {
     String trimmedColumnStr;
     if (columnString == null || (trimmedColumnStr = 
columnString.trim()).isEmpty()) {
       init();
@@ -717,7 +728,7 @@
     for (int nx = 0; nx < parts.length; nx++) {
       parts[nx] = parts[nx].trim();
     }
-    init(parts, virtualColAllowed);
+    init(parts, projection);
   }
 
   /**
@@ -727,7 +738,7 @@
       HashMap<Schema.ColumnSchema, HashSet<String>> keysmap)
       throws ParseException {
     int ncols = projcols.length;
-    Schema result = new Schema();
+    Schema result = new Schema(true);
     ColumnSchema cs, mycs;
     String keysStr;
     String[] keys;
@@ -742,7 +753,7 @@
       pn.setName(projcols[i]);
       if ((cs = getColumnSchemaOnParsedName(pn)) != null) {
         mycs = new ColumnSchema(pn.mName, cs.schema, cs.type);
-        result.add(mycs, true);
+        result.add(mycs);
         if (pn.mDT == ColumnType.MAP) {
           keysStr = projcols[i].substring(pn.mKeyOffset);
           if (!keysStr.startsWith("{") || !keysStr.endsWith("}"))

Modified: 
hadoop/pig/branches/branch-0.6/contrib/zebra/src/java/org/apache/hadoop/zebra/schema/SchemaParser.jjt
URL: 
http://svn.apache.org/viewvc/hadoop/pig/branches/branch-0.6/contrib/zebra/src/java/org/apache/hadoop/zebra/schema/SchemaParser.jjt?rev=884353&r1=884352&r2=884353&view=diff
==============================================================================
--- 
hadoop/pig/branches/branch-0.6/contrib/zebra/src/java/org/apache/hadoop/zebra/schema/SchemaParser.jjt
 (original)
+++ 
hadoop/pig/branches/branch-0.6/contrib/zebra/src/java/org/apache/hadoop/zebra/schema/SchemaParser.jjt
 Thu Nov 26 01:08:59 2009
@@ -40,10 +40,6 @@
                                        objout.close();
                                        System.out.println(schema.toString());
       }
-      public Schema RecordSchema(Schema list) throws ParseException
-      {
-        return RecordSchema(list, false);
-      }
 }
 PARSER_END(TableSchemaParser)
 
@@ -121,16 +117,36 @@
 }
 {
        (
-       LOOKAHEAD(SchemaRecord()) fs = SchemaRecord()
-|      LOOKAHEAD(SchemaCollection()) fs = SchemaCollection()
-|      LOOKAHEAD(SchemaMap()) fs = SchemaMap()
-|      LOOKAHEAD(AtomSchema()) fs = AtomSchema()
+  LOOKAHEAD(3) fs = SchemaRecord()
+|      LOOKAHEAD(3) fs = SchemaCollection()
+|      LOOKAHEAD(3) fs = SchemaMap()
+|      fs = AtomSchema()
        )
        {
                return fs;
        }
 }
 
+Schema.ColumnSchema ProjectionColumnSchema() throws ParseException: 
+{
+       Token t1; 
+       Schema item = null; 
+       Schema.ColumnSchema fs = null; 
+}
+{
+  (
+       (
+  LOOKAHEAD(3) fs = SchemaRecord()
+|      LOOKAHEAD(3) fs = SchemaCollection()
+|      LOOKAHEAD(3) fs = SchemaMap()
+|      fs = AtomSchema()
+  )
+  { return fs; }
+|
+  { return null; }
+       )
+}
+
 Schema.ColumnSchema AtomSchema() throws ParseException : 
 {
        Token t1 = null;
@@ -143,10 +159,6 @@
                        fs = new Schema.ColumnSchema(t1.image, type); 
                        return fs;
                }
-|
-    {
-      return null;
-    }
        )
 }
 
@@ -171,7 +183,7 @@
        Schema.ColumnSchema fs;
 }
 { 
-       t1 = <IDENTIFIER> ":" <RECORD> "(" s = RecordSchemaInternal(null) ")" 
+       t1 = <IDENTIFIER> ":" <RECORD> "(" s = RecordSchemaInternal() ")" 
        {
                fs = new Schema.ColumnSchema(t1.image, s, ColumnType.RECORD);
                return fs;
@@ -196,12 +208,14 @@
        Schema.ColumnSchema fs = null;
 }
 {
-       (
-       LOOKAHEAD(RecordSchemaInternal(null)) s= RecordSchemaInternal(null)
-|
-       fs = AnonymousColumnSchema()
-       )
-       { if (s == null) s = new Schema(fs); fs = new Schema.ColumnSchema(id, 
s, ColumnType.COLLECTION); return fs; }
+  (
+  s= RecordSchemaInternal()
+| fs = AnonymousColumnSchema()
+  )
+       { if (s == null) s = new Schema(fs);
+    fs = new Schema.ColumnSchema(id, s, ColumnType.COLLECTION);
+    return fs;
+  }
 }
 
 Schema.ColumnSchema AnonymousColumnSchema() throws ParseException : 
@@ -212,10 +226,10 @@
 }
 {
        (
-       LOOKAHEAD(AnonymousSchemaRecord()) fs = AnonymousSchemaRecord()
-|      LOOKAHEAD(AnonymousSchemaCollection()) fs = AnonymousSchemaCollection()
-|      LOOKAHEAD(AnonymousSchemaMap()) fs = AnonymousSchemaMap()
-|      LOOKAHEAD(AnonymousAtomSchema()) fs = AnonymousAtomSchema()
+       fs = AnonymousSchemaRecord()
+|      fs = AnonymousSchemaCollection()
+|      fs = AnonymousSchemaMap()
+|      fs = AnonymousAtomSchema()
        )
        {
                return fs;
@@ -256,7 +270,7 @@
        Schema.ColumnSchema fs;
 }
 { 
-       <RECORD> "(" s = RecordSchemaInternal(null) ")" 
+       <RECORD> "(" s = RecordSchemaInternal() ")" 
        {
                fs = new Schema.ColumnSchema(null, s, ColumnType.RECORD);
                return fs;
@@ -269,56 +283,59 @@
        Schema.ColumnSchema fs;
 }
 { 
-       ( <COLLECTION> "(" s = RecordSchemaInternal(null) ")"  )
+       ( <COLLECTION> "(" fs = SchemaCollectionEntry(null) ")"  )
        {
+    s = new Schema(fs);
                fs = new Schema.ColumnSchema(null, s, ColumnType.COLLECTION);
                return fs;
        } 
 }
 
-Schema RecordSchemaInternal(Schema list) throws ParseException : 
+Schema RecordSchemaInternal() throws ParseException : 
+{
+  Schema list = new Schema(); 
+       Schema.ColumnSchema fs = null;
+}
+{
+  fs = ColumnSchema() {list.add(fs);} ( "," fs = ColumnSchema() 
{list.add(fs);})*
+       { return list; }
+}
+
+Schema RecordSchema(Schema list) throws ParseException : 
 {
-       Schema item = null; 
        if (list == null)
                list = new Schema(); 
        Schema.ColumnSchema fs = null;
 }
 {
        (
-       (
-               fs = ColumnSchema() {list.add(fs);}
-               ( "," fs = ColumnSchema() {list.add(fs);})*
+               fs = ColumnSchema() { if (fs != null && 
Projection.isVirtualColumn(fs.getName())) throw new 
ParseException("["+fs.getName()+"] is a reserved virtual column name"); 
list.add(fs);}
+               ( "," fs = ColumnSchema() { if (fs != null && 
Projection.isVirtualColumn(fs.getName())) throw new 
ParseException("["+fs.getName()+"] is a reserved virtual column name"); 
list.add(fs);})* <EOF>
        )       
-       )
-       { return list; }
+       { return (list.getNumColumns() == 0 || (list.getNumColumns() == 1 && 
list.getColumn(0) == null) ? null : list); }
 }
 
-Schema RecordSchema(Schema list, boolean virtualColAllowed) throws 
ParseException : 
+Schema ProjectionSchema(Schema list) throws ParseException : 
 {
-       Schema item = null; 
        if (list == null)
                list = new Schema(); 
        Schema.ColumnSchema fs = null;
 }
 {
        (
-       (
-               fs = ColumnSchema() { if (!virtualColAllowed && fs != null && 
Projection.isVirtualColumn(fs.getName())) throw new 
ParseException("["+fs.getName()+"] is a reserved virtual column name"); 
list.add(fs);}
-               ( "," fs = ColumnSchema() { if (!virtualColAllowed && fs != 
null && Projection.isVirtualColumn(fs.getName())) throw new 
ParseException("["+fs.getName()+"] is a reserved virtual column name"); 
list.add(fs);})* <EOF>
+               fs = ProjectionColumnSchema() { list.add(fs);}
+               ( "," fs = ProjectionColumnSchema() { list.add(fs);})* <EOF>
        )       
-       )
        { return (list.getNumColumns() == 0 || (list.getNumColumns() == 1 && 
list.getColumn(0) == null) ? null : list); }
 }
 
 Schema MapSchema() throws ParseException : 
 {
-       Schema item = null; 
        Schema list = new Schema(); 
        Schema.ColumnSchema fs = null;
 }
 {
        (
-       LOOKAHEAD(3)
        (
                "(" fs = AnonymousColumnSchema() ")"
        )

Added: 
hadoop/pig/branches/branch-0.6/contrib/zebra/src/test/org/apache/hadoop/zebra/types/TestSchemaAnonymousCollection.java
URL: 
http://svn.apache.org/viewvc/hadoop/pig/branches/branch-0.6/contrib/zebra/src/test/org/apache/hadoop/zebra/types/TestSchemaAnonymousCollection.java?rev=884353&view=auto
==============================================================================
--- 
hadoop/pig/branches/branch-0.6/contrib/zebra/src/test/org/apache/hadoop/zebra/types/TestSchemaAnonymousCollection.java
 (added)
+++ 
hadoop/pig/branches/branch-0.6/contrib/zebra/src/test/org/apache/hadoop/zebra/types/TestSchemaAnonymousCollection.java
 Thu Nov 26 01:08:59 2009
@@ -0,0 +1,83 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.zebra.types;
+
+import java.io.StringReader;
+import junit.framework.Assert;
+
+import org.apache.hadoop.zebra.schema.ColumnType;
+import org.apache.hadoop.zebra.parser.ParseException;
+import org.apache.hadoop.zebra.schema.Schema;
+import org.apache.hadoop.zebra.parser.TableSchemaParser;
+import org.apache.hadoop.zebra.schema.Schema.ColumnSchema;
+import org.junit.Test;
+
+public class TestSchemaAnonymousCollection {
+  @Test
+  public void testSchemaValid1() throws ParseException {
+    String strSch = "c1:collection(f1:int, f2:int), 
c2:collection(collection(record(f3:float, f4)))";
+    TableSchemaParser parser;
+    Schema schema;
+
+    parser = new TableSchemaParser(new StringReader(strSch));
+    schema = parser.RecordSchema(null);
+    System.out.println(schema);
+
+    // test 1st level schema;
+    ColumnSchema f1 = schema.getColumn(0);
+    Assert.assertEquals("c1", f1.getName());
+    Assert.assertEquals(ColumnType.COLLECTION, f1.getType());
+
+    ColumnSchema f2 = schema.getColumn(1);
+    Assert.assertEquals("c2", f2.getName());
+    Assert.assertEquals(ColumnType.COLLECTION, f2.getType());
+
+    // test 2nd level schema;
+    Schema f1Schema = f1.getSchema();
+    ColumnSchema f11 = f1Schema.getColumn(0);
+    Assert.assertEquals("f1", f11.getName());
+    Assert.assertEquals(ColumnType.INT, f11.getType());
+    ColumnSchema f12 = f1Schema.getColumn(1);
+    Assert.assertEquals("f2", f12.getName());
+    Assert.assertEquals(ColumnType.INT, f12.getType());
+
+    Schema f2Schema = f2.getSchema();
+    ColumnSchema f21 = f2Schema.getColumn(0);
+    Assert.assertNull(f21.getName());
+    Assert.assertEquals(ColumnType.COLLECTION, f21.getType());
+
+    // test 3rd level schema;
+    Schema f21Schema = f21.getSchema();
+    ColumnSchema f211 = f21Schema.getColumn(0);
+    Assert.assertNull(f211.getName());
+    Assert.assertEquals(ColumnType.COLLECTION, f211.getType());
+    Schema f211Schema = f211.getSchema();
+    
+    ColumnSchema f212 = f211Schema.getColumn(0);
+    Assert.assertNull(f212.getName());
+    Assert.assertEquals(ColumnType.RECORD, f212.getType());
+    Schema f212Schema = f212.getSchema();
+    ColumnSchema f213 = f212Schema.getColumn(0);
+    Assert.assertEquals("f3", f213.getName());
+    Assert.assertEquals(ColumnType.FLOAT, f213.getType());
+    ColumnSchema f214 = f212Schema.getColumn(1);
+    Assert.assertEquals("f4", f214.getName());
+    Assert.assertEquals(ColumnType.BYTES, f214.getType());
+  }
+}
\ No newline at end of file

Modified: 
hadoop/pig/branches/branch-0.6/contrib/zebra/src/test/org/apache/hadoop/zebra/types/TestSchemaMap.java
URL: 
http://svn.apache.org/viewvc/hadoop/pig/branches/branch-0.6/contrib/zebra/src/test/org/apache/hadoop/zebra/types/TestSchemaMap.java?rev=884353&r1=884352&r2=884353&view=diff
==============================================================================
--- 
hadoop/pig/branches/branch-0.6/contrib/zebra/src/test/org/apache/hadoop/zebra/types/TestSchemaMap.java
 (original)
+++ 
hadoop/pig/branches/branch-0.6/contrib/zebra/src/test/org/apache/hadoop/zebra/types/TestSchemaMap.java
 Thu Nov 26 01:08:59 2009
@@ -150,7 +150,7 @@
       System.out.println(schema);
     } catch (Exception e) {
       String errMsg = e.getMessage();
-      String str = "Encountered \" \"(\" \"( \"\" at line 1, column 7.";
+      String str = "Encountered \"<EOF>\" at line 1, column 10.";
       System.out.println(errMsg);
       System.out.println(str);
       Assert.assertEquals(errMsg.startsWith(str), true);
@@ -169,7 +169,7 @@
       System.out.println(schema);
     } catch (Exception e) {
       String errMsg = e.getMessage();
-      String str = "Encountered \" \"(\" \"( \"\" at line 1, column 7.";
+      String str = "Encountered \" \",\" \", \"\" at line 1, column 11.";
       System.out.println(errMsg);
       System.out.println(str);
       Assert.assertEquals(errMsg.startsWith(str), true);
@@ -188,7 +188,7 @@
       System.out.println(schema);
     } catch (Exception e) {
       String errMsg = e.getMessage();
-      String str = "Encountered \" \"(\" \"( \"\" at line 1, column 7.";
+      String str = "Encountered \" <IDENTIFIER> \"m2 \"\" at line 1, column 
8.";
       System.out.println(errMsg);
       System.out.println(str);
       Assert.assertEquals(errMsg.startsWith(str), true);
@@ -207,7 +207,7 @@
       System.out.println(schema);
     } catch (Exception e) {
       String errMsg = e.getMessage();
-      String str = "Encountered \" \"(\" \"( \"\" at line 1, column 7.";
+      String str = "Encountered \" <IDENTIFIER> \"abc \"\" at line 1, column 
8.";
       System.out.println(errMsg);
       System.out.println(str);
       Assert.assertEquals(errMsg.startsWith(str), true);

svn commit: r884353 - in /hadoop/pig/branches/branch-0.6/contrib/zebra: ./ src/java/org/apache/hadoop/zebra/schema/ src/test/org/apache/hadoop/zebra/types/

Reply via email to