Author: yanz
Date: Wed Dec 23 01:21:08 2009
New Revision: 893373
URL: http://svn.apache.org/viewvc?rev=893373&view=rev
Log:
PIG-1153: Record split exception fix (yanz)
Added:
hadoop/pig/trunk/contrib/zebra/src/test/org/apache/hadoop/zebra/types/TestStorageRecord2.java
Modified:
hadoop/pig/trunk/contrib/zebra/CHANGES.txt
hadoop/pig/trunk/contrib/zebra/src/java/org/apache/hadoop/zebra/types/Partition.java
hadoop/pig/trunk/contrib/zebra/src/test/org/apache/hadoop/zebra/io/TestNegative.java
Modified: hadoop/pig/trunk/contrib/zebra/CHANGES.txt
URL:
http://svn.apache.org/viewvc/hadoop/pig/trunk/contrib/zebra/CHANGES.txt?rev=893373&r1=893372&r2=893373&view=diff
==============================================================================
--- hadoop/pig/trunk/contrib/zebra/CHANGES.txt (original)
+++ hadoop/pig/trunk/contrib/zebra/CHANGES.txt Wed Dec 23 01:21:08 2009
@@ -48,6 +48,8 @@
BUG FIXES
+ PIG-1153: Record split exception fix (yanz)
+
PIG-1145: Merge Join on Large Table throws an EOF exception (yanz)
PIG-1095: Schema support of anonymous fields in COLECTION fails (yanz via
Modified:
hadoop/pig/trunk/contrib/zebra/src/java/org/apache/hadoop/zebra/types/Partition.java
URL:
http://svn.apache.org/viewvc/hadoop/pig/trunk/contrib/zebra/src/java/org/apache/hadoop/zebra/types/Partition.java?rev=893373&r1=893372&r2=893373&view=diff
==============================================================================
---
hadoop/pig/trunk/contrib/zebra/src/java/org/apache/hadoop/zebra/types/Partition.java
(original)
+++
hadoop/pig/trunk/contrib/zebra/src/java/org/apache/hadoop/zebra/types/Partition.java
Wed Dec 23 01:21:08 2009
@@ -149,12 +149,11 @@
class PartitionFieldInfo {
private HashSet<PartitionInfo.ColumnMappingEntry> mSplitMaps =
new HashSet<ColumnMappingEntry>();
- private HashSet<String> mSplitColumns = new HashSet<String>();
private ColumnMappingEntry mCGIndex = null;
private String mCGName = null; // fully qualified name
private HashSet<String> keySet = null;
private SplitType stype = SplitType.NONE;
- private boolean splitChild;
+ private HashSet<String> splitChildren = new HashSet<String>();
/**
* set a MAP key split (sub)column
@@ -165,7 +164,6 @@
new Partition.PartitionInfo.ColumnMappingEntry( ri, fi, fs);
mSplitMaps.add(cme);
// multiple map splits on one MAP column is allowed!
- mSplitColumns.add(name);
if (keySet == null)
keySet = new HashSet<String>();
return cme.addKeys(keys, keySet);
@@ -196,22 +194,19 @@
if (st == stype)
{
// multiple MAP splits of a field and its children on different keys
are ok
- if (st == SplitType.MAP || cst == SplitType.MAP || splitChild ==
this.splitChild)
+ if (st == SplitType.MAP || cst == SplitType.MAP)
return;
}
- if (stype != SplitType.NONE) {
- if (childName != null)
- name = name + "." + childName;
- throw new ParseException("Different Split Types Set on the same
field: " + name);
+ if (splitChild)
+ {
+ if (stype != SplitType.NONE && splitChildren.isEmpty())
+ throw new ParseException("Split on "+name+" is set at different
levels.");
+ splitChildren.add(childName);
+ } else {
+ if (splitChildren.contains(childName))
+ throw new ParseException("Split on "+name+" is set at different
levels.");
}
stype = st;
- this.splitChild = splitChild;
- if (mSplitColumns.contains(name)) {
- if (childName != null)
- name = name + "." + childName;
- throw new ParseException("Split on "+name+" are set more than once");
- }
- mSplitColumns.add(name);
}
/*
Modified:
hadoop/pig/trunk/contrib/zebra/src/test/org/apache/hadoop/zebra/io/TestNegative.java
URL:
http://svn.apache.org/viewvc/hadoop/pig/trunk/contrib/zebra/src/test/org/apache/hadoop/zebra/io/TestNegative.java?rev=893373&r1=893372&r2=893373&view=diff
==============================================================================
---
hadoop/pig/trunk/contrib/zebra/src/test/org/apache/hadoop/zebra/io/TestNegative.java
(original)
+++
hadoop/pig/trunk/contrib/zebra/src/test/org/apache/hadoop/zebra/io/TestNegative.java
Wed Dec 23 01:21:08 2009
@@ -94,6 +94,33 @@
}
}
+//Negative test case. For record split, we should not try to store same
+ // record field on different column groups.
+ @Test
+ public void testWriteRecord6() throws IOException, ParseException {
+ String STR_SCHEMA = "r1:record(f1:int, f2:long),
r2:record(r3:record(f3:float, f4))";
+ String STR_STORAGE = "[r1.f1]; [r1.f2, r2.r3.f3]; [r2.r3]";
+ conf = new Configuration();
+ conf.setInt("table.output.tfile.minBlock.size", 64 * 1024);
+ conf.setInt("table.input.split.minSize", 64 * 1024);
+ conf.set("table.output.tfile.compression", "none");
+
+ RawLocalFileSystem rawLFS = new RawLocalFileSystem();
+ fs = new LocalFileSystem(rawLFS);
+ path = new Path(fs.getWorkingDirectory(), this.getClass().getSimpleName());
+ fs = path.getFileSystem(conf);
+ // drop any previous tables
+ BasicTable.drop(path, conf);
+ // Build Table and column groups
+ BasicTable.Writer writer = null;
+ try {
+ writer = new BasicTable.Writer(path, STR_SCHEMA, STR_STORAGE, conf);
+ Assert.fail("Should throw exception");
+ } catch (Exception e) {
+ System.out.println(e);
+ }
+ }
+
// Negative test case. map storage syntax is wrong
@Test
public void testWriteMap1() throws IOException, ParseException {
Added:
hadoop/pig/trunk/contrib/zebra/src/test/org/apache/hadoop/zebra/types/TestStorageRecord2.java
URL:
http://svn.apache.org/viewvc/hadoop/pig/trunk/contrib/zebra/src/test/org/apache/hadoop/zebra/types/TestStorageRecord2.java?rev=893373&view=auto
==============================================================================
---
hadoop/pig/trunk/contrib/zebra/src/test/org/apache/hadoop/zebra/types/TestStorageRecord2.java
(added)
+++
hadoop/pig/trunk/contrib/zebra/src/test/org/apache/hadoop/zebra/types/TestStorageRecord2.java
Wed Dec 23 01:21:08 2009
@@ -0,0 +1,137 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.zebra.types;
+
+import java.io.StringReader;
+import java.util.Iterator;
+import java.util.Map;
+import java.util.HashSet;
+import junit.framework.Assert;
+
+import org.apache.hadoop.zebra.types.CGSchema;
+import org.apache.hadoop.zebra.schema.ColumnType;
+import org.apache.hadoop.zebra.parser.ParseException;
+import org.apache.hadoop.zebra.types.Partition;
+import org.apache.hadoop.zebra.schema.Schema;
+import org.apache.hadoop.zebra.parser.TableSchemaParser;
+import org.apache.hadoop.zebra.schema.Schema.ColumnSchema;
+import org.junit.Before;
+import org.junit.Test;
+
+public class TestStorageRecord2 {
+ String strSch = "r1:record(f1:int, f2:int), r2:record(f5:int,
r3:record(f3:float, f4))";
+ TableSchemaParser parser;
+ Schema schema;
+
+ @Before
+ public void init() throws ParseException {
+ parser = new TableSchemaParser(new StringReader(strSch));
+ schema = parser.RecordSchema(null);
+ }
+
+ @Test
+ public void testStorageValid1() {
+ try {
+ String strStorage = "[r1.f1, r2.r3.f3, r2.f5]; [r1.f2, r2.r3.f4]";
+ Partition p = new Partition(schema.toString(), strStorage, null);
+ CGSchema[] cgschemas = p.getCGSchemas();
+
+ // 2 column group;
+ int size = cgschemas.length;
+ Assert.assertEquals(size, 2);
+ System.out.println("********** Column Groups **********");
+ for (int i = 0; i < cgschemas.length; i++) {
+ System.out.println(cgschemas[i]);
+ System.out.println("--------------------------------");
+ }
+ CGSchema cgs1 = cgschemas[0];
+ CGSchema cgs2 = cgschemas[1];
+
+ ColumnSchema f11 = cgs1.getSchema().getColumn(0);
+ Assert.assertEquals("r1.f1", f11.getName());
+ Assert.assertEquals(ColumnType.INT, f11.getType());
+ ColumnSchema f12 = cgs1.getSchema().getColumn(1);
+ Assert.assertEquals("r2.r3.f3", f12.getName());
+ Assert.assertEquals(ColumnType.FLOAT, f12.getType());
+
+ ColumnSchema f21 = cgs2.getSchema().getColumn(0);
+ Assert.assertEquals("r1.f2", f21.getName());
+ Assert.assertEquals(ColumnType.INT, f21.getType());
+ ColumnSchema f22 = cgs2.getSchema().getColumn(1);
+ Assert.assertEquals("r2.r3.f4", f22.getName());
+ Assert.assertEquals(ColumnType.BYTES, f22.getType());
+
+ System.out.println("*********** Column Map **********");
+ Map<String, HashSet<Partition.PartitionInfo.ColumnMappingEntry>> colmap
= p
+ .getPartitionInfo().getColMap();
+ Assert.assertEquals(colmap.size(), 5);
+ Iterator<Map.Entry<String,
HashSet<Partition.PartitionInfo.ColumnMappingEntry>>> it = colmap
+ .entrySet().iterator();
+ for (int i = 0; i < colmap.size(); i++) {
+ Map.Entry<String, HashSet<Partition.PartitionInfo.ColumnMappingEntry>>
entry = (Map.Entry<String,
HashSet<Partition.PartitionInfo.ColumnMappingEntry>>) it
+ .next();
+ String name = entry.getKey();
+ HashSet<Partition.PartitionInfo.ColumnMappingEntry> hs = entry
+ .getValue();
+ Iterator<Partition.PartitionInfo.ColumnMappingEntry> it1 = hs
+ .iterator();
+ for (int j = 0; j < hs.size(); j++) {
+ Partition.PartitionInfo.ColumnMappingEntry cme =
(Partition.PartitionInfo.ColumnMappingEntry) it1
+ .next();
+ System.out.println("[Column = " + name + " CG = " + cme.getCGIndex()
+ + "." + cme.getFieldIndex() + "]");
+ if (i == 0 && j == 0) {
+ Assert.assertEquals(name, "r2.f5");
+ Assert.assertEquals(cme.getCGIndex(), 0);
+ Assert.assertEquals(cme.getFieldIndex(), 2);
+ } else if (i == 1 && j == 0) {
+ Assert.assertEquals(name, "r1.f1");
+ Assert.assertEquals(cme.getCGIndex(), 0);
+ Assert.assertEquals(cme.getFieldIndex(), 0);
+ } else if (i == 2 && j == 0) {
+ Assert.assertEquals(name, "r1.f2");
+ Assert.assertEquals(cme.getCGIndex(), 1);
+ Assert.assertEquals(cme.getFieldIndex(), 0);
+ } else if (i == 3 && j == 0) {
+ Assert.assertEquals(name, "r2.r3.f3");
+ Assert.assertEquals(cme.getCGIndex(), 0);
+ Assert.assertEquals(cme.getFieldIndex(), 1);
+ } else if (i == 4 && j == 0) {
+ Assert.assertEquals(name, "r2.r3.f4");
+ Assert.assertEquals(cme.getCGIndex(), 1);
+ Assert.assertEquals(cme.getFieldIndex(), 1);
+ }
+ }
+ }
+ } catch (Exception e) {
+ Assert.assertTrue(false);
+ }
+ }
+
+ /*
+ * @Test public void testStorageInvalid1() { try { String strStorage =
+ * "m1#k1"; TableStorageParser parser = new TableStorageParser(new
+ * ByteArrayInputStream(strStorage.getBytes("UTF-8")), null, schema);
+ * ArrayList<CGSchema> schemas = parser.StorageSchema(); CGSchema cgs1 =
+ * schemas.get(0); } catch (Exception e) { String errMsg = e.getMessage();
+ * String str = "Encountered \" <IDENTIFIER> \"m1 \"\" at line 1, column 1.";
+ * System.out.println(errMsg); System.out.println(str);
+ * Assert.assertEquals(errMsg.startsWith(str), true); } }
+ */
+}