Author: yanz Date: Wed Dec 23 01:21:08 2009 New Revision: 893373 URL: http://svn.apache.org/viewvc?rev=893373&view=rev Log: PIG-1153: Record split exception fix (yanz)
Added: hadoop/pig/trunk/contrib/zebra/src/test/org/apache/hadoop/zebra/types/TestStorageRecord2.java Modified: hadoop/pig/trunk/contrib/zebra/CHANGES.txt hadoop/pig/trunk/contrib/zebra/src/java/org/apache/hadoop/zebra/types/Partition.java hadoop/pig/trunk/contrib/zebra/src/test/org/apache/hadoop/zebra/io/TestNegative.java Modified: hadoop/pig/trunk/contrib/zebra/CHANGES.txt URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/contrib/zebra/CHANGES.txt?rev=893373&r1=893372&r2=893373&view=diff ============================================================================== --- hadoop/pig/trunk/contrib/zebra/CHANGES.txt (original) +++ hadoop/pig/trunk/contrib/zebra/CHANGES.txt Wed Dec 23 01:21:08 2009 @@ -48,6 +48,8 @@ BUG FIXES + PIG-1153: Record split exception fix (yanz) + PIG-1145: Merge Join on Large Table throws an EOF exception (yanz) PIG-1095: Schema support of anonymous fields in COLECTION fails (yanz via Modified: hadoop/pig/trunk/contrib/zebra/src/java/org/apache/hadoop/zebra/types/Partition.java URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/contrib/zebra/src/java/org/apache/hadoop/zebra/types/Partition.java?rev=893373&r1=893372&r2=893373&view=diff ============================================================================== --- hadoop/pig/trunk/contrib/zebra/src/java/org/apache/hadoop/zebra/types/Partition.java (original) +++ hadoop/pig/trunk/contrib/zebra/src/java/org/apache/hadoop/zebra/types/Partition.java Wed Dec 23 01:21:08 2009 @@ -149,12 +149,11 @@ class PartitionFieldInfo { private HashSet<PartitionInfo.ColumnMappingEntry> mSplitMaps = new HashSet<ColumnMappingEntry>(); - private HashSet<String> mSplitColumns = new HashSet<String>(); private ColumnMappingEntry mCGIndex = null; private String mCGName = null; // fully qualified name private HashSet<String> keySet = null; private SplitType stype = SplitType.NONE; - private boolean splitChild; + private HashSet<String> splitChildren = new HashSet<String>(); /** * set a MAP key split (sub)column @@ -165,7 +164,6 @@ new Partition.PartitionInfo.ColumnMappingEntry( ri, fi, fs); mSplitMaps.add(cme); // multiple map splits on one MAP column is allowed! - mSplitColumns.add(name); if (keySet == null) keySet = new HashSet<String>(); return cme.addKeys(keys, keySet); @@ -196,22 +194,19 @@ if (st == stype) { // multiple MAP splits of a field and its children on different keys are ok - if (st == SplitType.MAP || cst == SplitType.MAP || splitChild == this.splitChild) + if (st == SplitType.MAP || cst == SplitType.MAP) return; } - if (stype != SplitType.NONE) { - if (childName != null) - name = name + "." + childName; - throw new ParseException("Different Split Types Set on the same field: " + name); + if (splitChild) + { + if (stype != SplitType.NONE && splitChildren.isEmpty()) + throw new ParseException("Split on "+name+" is set at different levels."); + splitChildren.add(childName); + } else { + if (splitChildren.contains(childName)) + throw new ParseException("Split on "+name+" is set at different levels."); } stype = st; - this.splitChild = splitChild; - if (mSplitColumns.contains(name)) { - if (childName != null) - name = name + "." + childName; - throw new ParseException("Split on "+name+" are set more than once"); - } - mSplitColumns.add(name); } /* Modified: hadoop/pig/trunk/contrib/zebra/src/test/org/apache/hadoop/zebra/io/TestNegative.java URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/contrib/zebra/src/test/org/apache/hadoop/zebra/io/TestNegative.java?rev=893373&r1=893372&r2=893373&view=diff ============================================================================== --- hadoop/pig/trunk/contrib/zebra/src/test/org/apache/hadoop/zebra/io/TestNegative.java (original) +++ hadoop/pig/trunk/contrib/zebra/src/test/org/apache/hadoop/zebra/io/TestNegative.java Wed Dec 23 01:21:08 2009 @@ -94,6 +94,33 @@ } } +//Negative test case. For record split, we should not try to store same + // record field on different column groups. + @Test + public void testWriteRecord6() throws IOException, ParseException { + String STR_SCHEMA = "r1:record(f1:int, f2:long), r2:record(r3:record(f3:float, f4))"; + String STR_STORAGE = "[r1.f1]; [r1.f2, r2.r3.f3]; [r2.r3]"; + conf = new Configuration(); + conf.setInt("table.output.tfile.minBlock.size", 64 * 1024); + conf.setInt("table.input.split.minSize", 64 * 1024); + conf.set("table.output.tfile.compression", "none"); + + RawLocalFileSystem rawLFS = new RawLocalFileSystem(); + fs = new LocalFileSystem(rawLFS); + path = new Path(fs.getWorkingDirectory(), this.getClass().getSimpleName()); + fs = path.getFileSystem(conf); + // drop any previous tables + BasicTable.drop(path, conf); + // Build Table and column groups + BasicTable.Writer writer = null; + try { + writer = new BasicTable.Writer(path, STR_SCHEMA, STR_STORAGE, conf); + Assert.fail("Should throw exception"); + } catch (Exception e) { + System.out.println(e); + } + } + // Negative test case. map storage syntax is wrong @Test public void testWriteMap1() throws IOException, ParseException { Added: hadoop/pig/trunk/contrib/zebra/src/test/org/apache/hadoop/zebra/types/TestStorageRecord2.java URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/contrib/zebra/src/test/org/apache/hadoop/zebra/types/TestStorageRecord2.java?rev=893373&view=auto ============================================================================== --- hadoop/pig/trunk/contrib/zebra/src/test/org/apache/hadoop/zebra/types/TestStorageRecord2.java (added) +++ hadoop/pig/trunk/contrib/zebra/src/test/org/apache/hadoop/zebra/types/TestStorageRecord2.java Wed Dec 23 01:21:08 2009 @@ -0,0 +1,137 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.zebra.types; + +import java.io.StringReader; +import java.util.Iterator; +import java.util.Map; +import java.util.HashSet; +import junit.framework.Assert; + +import org.apache.hadoop.zebra.types.CGSchema; +import org.apache.hadoop.zebra.schema.ColumnType; +import org.apache.hadoop.zebra.parser.ParseException; +import org.apache.hadoop.zebra.types.Partition; +import org.apache.hadoop.zebra.schema.Schema; +import org.apache.hadoop.zebra.parser.TableSchemaParser; +import org.apache.hadoop.zebra.schema.Schema.ColumnSchema; +import org.junit.Before; +import org.junit.Test; + +public class TestStorageRecord2 { + String strSch = "r1:record(f1:int, f2:int), r2:record(f5:int, r3:record(f3:float, f4))"; + TableSchemaParser parser; + Schema schema; + + @Before + public void init() throws ParseException { + parser = new TableSchemaParser(new StringReader(strSch)); + schema = parser.RecordSchema(null); + } + + @Test + public void testStorageValid1() { + try { + String strStorage = "[r1.f1, r2.r3.f3, r2.f5]; [r1.f2, r2.r3.f4]"; + Partition p = new Partition(schema.toString(), strStorage, null); + CGSchema[] cgschemas = p.getCGSchemas(); + + // 2 column group; + int size = cgschemas.length; + Assert.assertEquals(size, 2); + System.out.println("********** Column Groups **********"); + for (int i = 0; i < cgschemas.length; i++) { + System.out.println(cgschemas[i]); + System.out.println("--------------------------------"); + } + CGSchema cgs1 = cgschemas[0]; + CGSchema cgs2 = cgschemas[1]; + + ColumnSchema f11 = cgs1.getSchema().getColumn(0); + Assert.assertEquals("r1.f1", f11.getName()); + Assert.assertEquals(ColumnType.INT, f11.getType()); + ColumnSchema f12 = cgs1.getSchema().getColumn(1); + Assert.assertEquals("r2.r3.f3", f12.getName()); + Assert.assertEquals(ColumnType.FLOAT, f12.getType()); + + ColumnSchema f21 = cgs2.getSchema().getColumn(0); + Assert.assertEquals("r1.f2", f21.getName()); + Assert.assertEquals(ColumnType.INT, f21.getType()); + ColumnSchema f22 = cgs2.getSchema().getColumn(1); + Assert.assertEquals("r2.r3.f4", f22.getName()); + Assert.assertEquals(ColumnType.BYTES, f22.getType()); + + System.out.println("*********** Column Map **********"); + Map<String, HashSet<Partition.PartitionInfo.ColumnMappingEntry>> colmap = p + .getPartitionInfo().getColMap(); + Assert.assertEquals(colmap.size(), 5); + Iterator<Map.Entry<String, HashSet<Partition.PartitionInfo.ColumnMappingEntry>>> it = colmap + .entrySet().iterator(); + for (int i = 0; i < colmap.size(); i++) { + Map.Entry<String, HashSet<Partition.PartitionInfo.ColumnMappingEntry>> entry = (Map.Entry<String, HashSet<Partition.PartitionInfo.ColumnMappingEntry>>) it + .next(); + String name = entry.getKey(); + HashSet<Partition.PartitionInfo.ColumnMappingEntry> hs = entry + .getValue(); + Iterator<Partition.PartitionInfo.ColumnMappingEntry> it1 = hs + .iterator(); + for (int j = 0; j < hs.size(); j++) { + Partition.PartitionInfo.ColumnMappingEntry cme = (Partition.PartitionInfo.ColumnMappingEntry) it1 + .next(); + System.out.println("[Column = " + name + " CG = " + cme.getCGIndex() + + "." + cme.getFieldIndex() + "]"); + if (i == 0 && j == 0) { + Assert.assertEquals(name, "r2.f5"); + Assert.assertEquals(cme.getCGIndex(), 0); + Assert.assertEquals(cme.getFieldIndex(), 2); + } else if (i == 1 && j == 0) { + Assert.assertEquals(name, "r1.f1"); + Assert.assertEquals(cme.getCGIndex(), 0); + Assert.assertEquals(cme.getFieldIndex(), 0); + } else if (i == 2 && j == 0) { + Assert.assertEquals(name, "r1.f2"); + Assert.assertEquals(cme.getCGIndex(), 1); + Assert.assertEquals(cme.getFieldIndex(), 0); + } else if (i == 3 && j == 0) { + Assert.assertEquals(name, "r2.r3.f3"); + Assert.assertEquals(cme.getCGIndex(), 0); + Assert.assertEquals(cme.getFieldIndex(), 1); + } else if (i == 4 && j == 0) { + Assert.assertEquals(name, "r2.r3.f4"); + Assert.assertEquals(cme.getCGIndex(), 1); + Assert.assertEquals(cme.getFieldIndex(), 1); + } + } + } + } catch (Exception e) { + Assert.assertTrue(false); + } + } + + /* + * @Test public void testStorageInvalid1() { try { String strStorage = + * "m1#k1"; TableStorageParser parser = new TableStorageParser(new + * ByteArrayInputStream(strStorage.getBytes("UTF-8")), null, schema); + * ArrayList<CGSchema> schemas = parser.StorageSchema(); CGSchema cgs1 = + * schemas.get(0); } catch (Exception e) { String errMsg = e.getMessage(); + * String str = "Encountered \" <IDENTIFIER> \"m1 \"\" at line 1, column 1."; + * System.out.println(errMsg); System.out.println(str); + * Assert.assertEquals(errMsg.startsWith(str), true); } } + */ +}