Author: daijy
Date: Thu Mar 11 18:37:11 2010
New Revision: 921975
URL: http://svn.apache.org/viewvc?rev=921975&view=rev
Log:
PIG-1275: empty bag in PigStorage read as null
Modified:
hadoop/pig/trunk/CHANGES.txt
hadoop/pig/trunk/src/org/apache/pig/builtin/Utf8StorageConverter.java
hadoop/pig/trunk/test/org/apache/pig/test/TestTextDataParser.java
Modified: hadoop/pig/trunk/CHANGES.txt
URL:
http://svn.apache.org/viewvc/hadoop/pig/trunk/CHANGES.txt?rev=921975&r1=921974&r2=921975&view=diff
==============================================================================
--- hadoop/pig/trunk/CHANGES.txt (original)
+++ hadoop/pig/trunk/CHANGES.txt Thu Mar 11 18:37:11 2010
@@ -147,6 +147,8 @@ OPTIMIZATIONS
BUG FIXES
+PIG-1275: empty bag in PigStorage read as null (daijy)
+
PIG-1252: Diamond splitter does not generate correct results when using
Multi-query optimization (rding)
Modified: hadoop/pig/trunk/src/org/apache/pig/builtin/Utf8StorageConverter.java
URL:
http://svn.apache.org/viewvc/hadoop/pig/trunk/src/org/apache/pig/builtin/Utf8StorageConverter.java?rev=921975&r1=921974&r2=921975&view=diff
==============================================================================
--- hadoop/pig/trunk/src/org/apache/pig/builtin/Utf8StorageConverter.java
(original)
+++ hadoop/pig/trunk/src/org/apache/pig/builtin/Utf8StorageConverter.java Thu
Mar 11 18:37:11 2010
@@ -20,6 +20,7 @@ package org.apache.pig.builtin;
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
+import java.io.PushbackInputStream;
import java.util.Arrays;
import java.util.HashMap;
import java.util.Map;
@@ -67,7 +68,7 @@ public class Utf8StorageConverter implem
}
}
- private DataBag consumeBag(ByteArrayInputStream in, ResourceFieldSchema
fieldSchema) throws IOException {
+ private DataBag consumeBag(PushbackInputStream in, ResourceFieldSchema
fieldSchema) throws IOException {
if (fieldSchema==null) {
throw new IOException("Schema is null");
}
@@ -85,7 +86,8 @@ public class Utf8StorageConverter implem
DataBag db = DefaultBagFactory.getInstance().newDefaultBag();
while (true) {
t = consumeTuple(in, fs);
- db.add(t);
+ if (t!=null)
+ db.add(t);
while ((buf=in.read())!='}'&&buf!=',') {
if (buf==-1) {
throw new IOException("Unexpect end of bag");
@@ -97,17 +99,21 @@ public class Utf8StorageConverter implem
return db;
}
- private Tuple consumeTuple(ByteArrayInputStream in, ResourceFieldSchema
fieldSchema) throws IOException {
+ private Tuple consumeTuple(PushbackInputStream in, ResourceFieldSchema
fieldSchema) throws IOException {
if (fieldSchema==null) {
throw new IOException("Schema is null");
}
int buf;
ByteArrayOutputStream mOut;
- while ((buf=in.read())!='(') {
+ while ((buf=in.read())!='('||buf=='}') {
if (buf==-1) {
throw new IOException("Unexpect end of tuple");
}
+ if (buf=='}') {
+ in.unread(buf);
+ return null;
+ }
}
Tuple t = DefaultTupleFactory.getInstance().newTuple();
if (fieldSchema.getSchema()!=null &&
fieldSchema.getSchema().getFields().length!=0) {
@@ -172,7 +178,7 @@ public class Utf8StorageConverter implem
return t;
}
- private Map<String, Object> consumeMap(ByteArrayInputStream in,
ResourceFieldSchema fieldSchema) throws IOException {
+ private Map<String, Object> consumeMap(PushbackInputStream in,
ResourceFieldSchema fieldSchema) throws IOException {
if (fieldSchema==null) {
throw new IOException("Schema is null");
}
@@ -232,7 +238,7 @@ public class Utf8StorageConverter implem
return m;
}
- private Object consumeComplexType(ByteArrayInputStream in,
ResourceFieldSchema complexFieldSchema) throws IOException {
+ private Object consumeComplexType(PushbackInputStream in,
ResourceFieldSchema complexFieldSchema) throws IOException {
Object field;
switch (complexFieldSchema.getType()) {
case DataType.BAG:
@@ -285,7 +291,8 @@ public class Utf8StorageConverter implem
return null;
DataBag db;
try {
- ByteArrayInputStream in = new ByteArrayInputStream(b);
+ ByteArrayInputStream bis = new ByteArrayInputStream(b);
+ PushbackInputStream in = new PushbackInputStream(bis);
db = consumeBag(in, schema);
} catch (IOException e) {
LogUtils.warn(this, "Unable to interpret value " +
Arrays.toString(b) + " in field being " +
@@ -424,7 +431,8 @@ public class Utf8StorageConverter implem
ResourceFieldSchema fs = new ResourceFieldSchema();
fs.setType(DataType.MAP);
try {
- ByteArrayInputStream in = new ByteArrayInputStream(b);
+ ByteArrayInputStream bis = new ByteArrayInputStream(b);
+ PushbackInputStream in = new PushbackInputStream(bis);
map = consumeMap(in, fs);
}
catch (IOException e) {
@@ -443,7 +451,8 @@ public class Utf8StorageConverter implem
Tuple t;
try {
- ByteArrayInputStream in = new ByteArrayInputStream(b);
+ ByteArrayInputStream bis = new ByteArrayInputStream(b);
+ PushbackInputStream in = new PushbackInputStream(bis);
t = consumeTuple(in, fieldSchema);
}
catch (IOException e) {
Modified: hadoop/pig/trunk/test/org/apache/pig/test/TestTextDataParser.java
URL:
http://svn.apache.org/viewvc/hadoop/pig/trunk/test/org/apache/pig/test/TestTextDataParser.java?rev=921975&r1=921974&r2=921975&view=diff
==============================================================================
--- hadoop/pig/trunk/test/org/apache/pig/test/TestTextDataParser.java (original)
+++ hadoop/pig/trunk/test/org/apache/pig/test/TestTextDataParser.java Thu Mar
11 18:37:11 2010
@@ -216,5 +216,14 @@ public class TestTextDataParser extends
expectedTuple.set(1, "b");
expectedBag.add(expectedTuple);
assertTrue(b.equals(expectedBag));
+ }
+
+ @Test
+ public void testEmptyBag() throws Exception{
+ String myBag = "{}";
+ Object o = ps.getLoadCaster().bytesToBag(myBag.getBytes(),
getBagFieldSchema());
+ assertTrue(o instanceof DataBag);
+ DataBag b = (DataBag)o;
+ assertTrue(b.size()==0);
}
}