TestStorageRecord2.java

yanz Tue, 22 Dec 2009 17:21:36 -0800

Author: yanz
Date: Wed Dec 23 01:21:08 2009
New Revision: 893373

URL: http://svn.apache.org/viewvc?rev=893373&view=rev
Log:
PIG-1153: Record split exception fix (yanz)


Added:
    
hadoop/pig/trunk/contrib/zebra/src/test/org/apache/hadoop/zebra/types/TestStorageRecord2.java
Modified:
    hadoop/pig/trunk/contrib/zebra/CHANGES.txt
    
hadoop/pig/trunk/contrib/zebra/src/java/org/apache/hadoop/zebra/types/Partition.java
    
hadoop/pig/trunk/contrib/zebra/src/test/org/apache/hadoop/zebra/io/TestNegative.java

Modified: hadoop/pig/trunk/contrib/zebra/CHANGES.txt
URL: 
http://svn.apache.org/viewvc/hadoop/pig/trunk/contrib/zebra/CHANGES.txt?rev=893373&r1=893372&r2=893373&view=diff
==============================================================================
--- hadoop/pig/trunk/contrib/zebra/CHANGES.txt (original)
+++ hadoop/pig/trunk/contrib/zebra/CHANGES.txt Wed Dec 23 01:21:08 2009
@@ -48,6 +48,8 @@
 
   BUG FIXES
 
+    PIG-1153: Record split exception fix (yanz)
+
     PIG-1145: Merge Join on Large Table throws an EOF exception (yanz)
 
     PIG-1095: Schema support of anonymous fields in COLECTION fails (yanz via

Modified: 
hadoop/pig/trunk/contrib/zebra/src/java/org/apache/hadoop/zebra/types/Partition.java
URL: 
http://svn.apache.org/viewvc/hadoop/pig/trunk/contrib/zebra/src/java/org/apache/hadoop/zebra/types/Partition.java?rev=893373&r1=893372&r2=893373&view=diff
==============================================================================
--- 
hadoop/pig/trunk/contrib/zebra/src/java/org/apache/hadoop/zebra/types/Partition.java
 (original)
+++ 
hadoop/pig/trunk/contrib/zebra/src/java/org/apache/hadoop/zebra/types/Partition.java
 Wed Dec 23 01:21:08 2009
@@ -149,12 +149,11 @@
     class PartitionFieldInfo {
       private HashSet<PartitionInfo.ColumnMappingEntry> mSplitMaps =
           new HashSet<ColumnMappingEntry>();
-      private HashSet<String> mSplitColumns = new HashSet<String>();
       private ColumnMappingEntry mCGIndex = null;
       private String mCGName = null; // fully qualified name
       private HashSet<String> keySet = null;
       private SplitType stype = SplitType.NONE;
-      private boolean splitChild;
+      private HashSet<String> splitChildren = new HashSet<String>();
 
       /**
        * set a MAP key split (sub)column
@@ -165,7 +164,6 @@
               new Partition.PartitionInfo.ColumnMappingEntry( ri, fi, fs);
         mSplitMaps.add(cme);
         // multiple map splits on one MAP column is allowed!
-        mSplitColumns.add(name);
         if (keySet == null)
           keySet = new HashSet<String>();
         return cme.addKeys(keys, keySet);
@@ -196,22 +194,19 @@
         if (st == stype)
         {
           // multiple MAP splits of a field and its children on different keys 
are ok
-          if (st == SplitType.MAP || cst == SplitType.MAP || splitChild == 
this.splitChild)
+          if (st == SplitType.MAP || cst == SplitType.MAP)
             return;
         }
-        if (stype != SplitType.NONE) {
-          if (childName != null)
-            name = name + "." + childName;
-          throw new ParseException("Different Split Types Set on the same 
field: " + name);
+        if (splitChild)
+        {
+          if (stype != SplitType.NONE && splitChildren.isEmpty())
+            throw new ParseException("Split on "+name+" is set at different 
levels.");
+          splitChildren.add(childName);
+        } else {
+          if (splitChildren.contains(childName))
+            throw new ParseException("Split on "+name+" is set at different 
levels.");
         }
         stype = st;
-        this.splitChild = splitChild;
-        if (mSplitColumns.contains(name)) {
-          if (childName != null)
-            name = name + "." + childName;
-          throw new ParseException("Split on "+name+" are set more than once");
-        }
-        mSplitColumns.add(name);
       }
 
       /*

Modified: 
hadoop/pig/trunk/contrib/zebra/src/test/org/apache/hadoop/zebra/io/TestNegative.java
URL: 
http://svn.apache.org/viewvc/hadoop/pig/trunk/contrib/zebra/src/test/org/apache/hadoop/zebra/io/TestNegative.java?rev=893373&r1=893372&r2=893373&view=diff
==============================================================================
--- 
hadoop/pig/trunk/contrib/zebra/src/test/org/apache/hadoop/zebra/io/TestNegative.java
 (original)
+++ 
hadoop/pig/trunk/contrib/zebra/src/test/org/apache/hadoop/zebra/io/TestNegative.java
 Wed Dec 23 01:21:08 2009
@@ -94,6 +94,33 @@
     }
   }
 
+//Negative test case. For record split, we should not try to store same
+  // record field on different column groups.
+  @Test
+  public void testWriteRecord6() throws IOException, ParseException {
+    String STR_SCHEMA = "r1:record(f1:int, f2:long), 
r2:record(r3:record(f3:float, f4))";
+    String STR_STORAGE = "[r1.f1]; [r1.f2, r2.r3.f3]; [r2.r3]";
+    conf = new Configuration();
+    conf.setInt("table.output.tfile.minBlock.size", 64 * 1024);
+    conf.setInt("table.input.split.minSize", 64 * 1024);
+    conf.set("table.output.tfile.compression", "none");
+
+    RawLocalFileSystem rawLFS = new RawLocalFileSystem();
+    fs = new LocalFileSystem(rawLFS);
+    path = new Path(fs.getWorkingDirectory(), this.getClass().getSimpleName());
+    fs = path.getFileSystem(conf);
+    // drop any previous tables
+    BasicTable.drop(path, conf);
+    // Build Table and column groups
+    BasicTable.Writer writer = null;
+    try {
+      writer = new BasicTable.Writer(path, STR_SCHEMA, STR_STORAGE, conf);
+      Assert.fail("Should throw exception");
+    } catch (Exception e) {
+      System.out.println(e);
+    }
+  }
+  
   // Negative test case. map storage syntax is wrong
   @Test
   public void testWriteMap1() throws IOException, ParseException {

Added: 
hadoop/pig/trunk/contrib/zebra/src/test/org/apache/hadoop/zebra/types/TestStorageRecord2.java
URL: 
http://svn.apache.org/viewvc/hadoop/pig/trunk/contrib/zebra/src/test/org/apache/hadoop/zebra/types/TestStorageRecord2.java?rev=893373&view=auto
==============================================================================
--- 
hadoop/pig/trunk/contrib/zebra/src/test/org/apache/hadoop/zebra/types/TestStorageRecord2.java
 (added)
+++ 
hadoop/pig/trunk/contrib/zebra/src/test/org/apache/hadoop/zebra/types/TestStorageRecord2.java
 Wed Dec 23 01:21:08 2009
@@ -0,0 +1,137 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.zebra.types;
+
+import java.io.StringReader;
+import java.util.Iterator;
+import java.util.Map;
+import java.util.HashSet;
+import junit.framework.Assert;
+
+import org.apache.hadoop.zebra.types.CGSchema;
+import org.apache.hadoop.zebra.schema.ColumnType;
+import org.apache.hadoop.zebra.parser.ParseException;
+import org.apache.hadoop.zebra.types.Partition;
+import org.apache.hadoop.zebra.schema.Schema;
+import org.apache.hadoop.zebra.parser.TableSchemaParser;
+import org.apache.hadoop.zebra.schema.Schema.ColumnSchema;
+import org.junit.Before;
+import org.junit.Test;
+
+public class TestStorageRecord2 {
+  String strSch = "r1:record(f1:int, f2:int), r2:record(f5:int, 
r3:record(f3:float, f4))";
+  TableSchemaParser parser;
+  Schema schema;
+
+  @Before
+  public void init() throws ParseException {
+    parser = new TableSchemaParser(new StringReader(strSch));
+    schema = parser.RecordSchema(null);
+  }
+
+  @Test
+  public void testStorageValid1() {
+    try {
+      String strStorage = "[r1.f1, r2.r3.f3, r2.f5]; [r1.f2, r2.r3.f4]";
+      Partition p = new Partition(schema.toString(), strStorage, null);
+      CGSchema[] cgschemas = p.getCGSchemas();
+
+      // 2 column group;
+      int size = cgschemas.length;
+      Assert.assertEquals(size, 2);
+      System.out.println("********** Column Groups **********");
+      for (int i = 0; i < cgschemas.length; i++) {
+        System.out.println(cgschemas[i]);
+        System.out.println("--------------------------------");
+      }
+      CGSchema cgs1 = cgschemas[0];
+      CGSchema cgs2 = cgschemas[1];
+
+      ColumnSchema f11 = cgs1.getSchema().getColumn(0);
+      Assert.assertEquals("r1.f1", f11.getName());
+      Assert.assertEquals(ColumnType.INT, f11.getType());
+      ColumnSchema f12 = cgs1.getSchema().getColumn(1);
+      Assert.assertEquals("r2.r3.f3", f12.getName());
+      Assert.assertEquals(ColumnType.FLOAT, f12.getType());
+
+      ColumnSchema f21 = cgs2.getSchema().getColumn(0);
+      Assert.assertEquals("r1.f2", f21.getName());
+      Assert.assertEquals(ColumnType.INT, f21.getType());
+      ColumnSchema f22 = cgs2.getSchema().getColumn(1);
+      Assert.assertEquals("r2.r3.f4", f22.getName());
+      Assert.assertEquals(ColumnType.BYTES, f22.getType());
+
+      System.out.println("*********** Column Map **********");
+      Map<String, HashSet<Partition.PartitionInfo.ColumnMappingEntry>> colmap 
= p
+          .getPartitionInfo().getColMap();
+      Assert.assertEquals(colmap.size(), 5);
+      Iterator<Map.Entry<String, 
HashSet<Partition.PartitionInfo.ColumnMappingEntry>>> it = colmap
+          .entrySet().iterator();
+      for (int i = 0; i < colmap.size(); i++) {
+        Map.Entry<String, HashSet<Partition.PartitionInfo.ColumnMappingEntry>> 
entry = (Map.Entry<String, 
HashSet<Partition.PartitionInfo.ColumnMappingEntry>>) it
+            .next();
+        String name = entry.getKey();
+        HashSet<Partition.PartitionInfo.ColumnMappingEntry> hs = entry
+            .getValue();
+        Iterator<Partition.PartitionInfo.ColumnMappingEntry> it1 = hs
+            .iterator();
+        for (int j = 0; j < hs.size(); j++) {
+          Partition.PartitionInfo.ColumnMappingEntry cme = 
(Partition.PartitionInfo.ColumnMappingEntry) it1
+              .next();
+          System.out.println("[Column = " + name + " CG = " + cme.getCGIndex()
+              + "." + cme.getFieldIndex() + "]");
+          if (i == 0 && j == 0) {
+            Assert.assertEquals(name, "r2.f5");
+            Assert.assertEquals(cme.getCGIndex(), 0);
+            Assert.assertEquals(cme.getFieldIndex(), 2);
+          } else if (i == 1 && j == 0) {
+            Assert.assertEquals(name, "r1.f1");
+            Assert.assertEquals(cme.getCGIndex(), 0);
+            Assert.assertEquals(cme.getFieldIndex(), 0);
+          } else if (i == 2 && j == 0) {
+            Assert.assertEquals(name, "r1.f2");
+            Assert.assertEquals(cme.getCGIndex(), 1);
+            Assert.assertEquals(cme.getFieldIndex(), 0);
+          } else if (i == 3 && j == 0) {
+            Assert.assertEquals(name, "r2.r3.f3");
+            Assert.assertEquals(cme.getCGIndex(), 0);
+            Assert.assertEquals(cme.getFieldIndex(), 1);
+          } else if (i == 4 && j == 0) {
+            Assert.assertEquals(name, "r2.r3.f4");
+            Assert.assertEquals(cme.getCGIndex(), 1);
+            Assert.assertEquals(cme.getFieldIndex(), 1);
+          }
+        }
+      }
+    } catch (Exception e) {
+      Assert.assertTrue(false);
+    }
+  }
+
+  /*
+   * @Test public void testStorageInvalid1() { try { String strStorage =
+   * "m1#k1"; TableStorageParser parser = new TableStorageParser(new
+   * ByteArrayInputStream(strStorage.getBytes("UTF-8")), null, schema);
+   * ArrayList<CGSchema> schemas = parser.StorageSchema(); CGSchema cgs1 =
+   * schemas.get(0); } catch (Exception e) { String errMsg = e.getMessage();
+   * String str = "Encountered \" <IDENTIFIER> \"m1 \"\" at line 1, column 1.";
+   * System.out.println(errMsg); System.out.println(str);
+   * Assert.assertEquals(errMsg.startsWith(str), true); } }
+   */
+}

svn commit: r893373 - in /hadoop/pig/trunk/contrib/zebra: CHANGES.txt src/java/org/apache/hadoop/zebra/types/Partition.java src/test/org/apache/hadoop/zebra/io/TestNegative.java src/test/org/apache/hadoop/zebra/types/TestStorageRecord2.java

Reply via email to