Author: daijy
Date: Tue Jan 6 01:52:33 2015
New Revision: 1649711
URL: http://svn.apache.org/r1649711
Log:
PIG-2949: JsonLoader only reads arrays of objects
Modified:
pig/trunk/CHANGES.txt
pig/trunk/src/org/apache/pig/builtin/JsonLoader.java
pig/trunk/test/org/apache/pig/test/TestJsonLoaderStorage.java
Modified: pig/trunk/CHANGES.txt
URL:
http://svn.apache.org/viewvc/pig/trunk/CHANGES.txt?rev=1649711&r1=1649710&r2=1649711&view=diff
==============================================================================
--- pig/trunk/CHANGES.txt (original)
+++ pig/trunk/CHANGES.txt Tue Jan 6 01:52:33 2015
@@ -30,6 +30,8 @@ PIG-4333: Split BigData tests into multi
BUG FIXES
+PIG-2949: JsonLoader only reads arrays of objects (eyal via daijy)
+
PIG-4213: CSVExcelStorage not quoting texts containing \r (CR) when storing
(alfonso.nishikawa via daijy)
PIG-2647: Split Combining drops splits with empty getLocations() (tmwoodruff
via daijy)
Modified: pig/trunk/src/org/apache/pig/builtin/JsonLoader.java
URL:
http://svn.apache.org/viewvc/pig/trunk/src/org/apache/pig/builtin/JsonLoader.java?rev=1649711&r1=1649710&r2=1649711&view=diff
==============================================================================
--- pig/trunk/src/org/apache/pig/builtin/JsonLoader.java (original)
+++ pig/trunk/src/org/apache/pig/builtin/JsonLoader.java Tue Jan 6 01:52:33
2015
@@ -54,10 +54,8 @@ import org.apache.pig.data.DataByteArray
import org.apache.pig.data.DataType;
import org.apache.pig.data.Tuple;
import org.apache.pig.data.TupleFactory;
-import org.apache.pig.impl.util.JarManager;
import org.apache.pig.impl.util.UDFContext;
import org.apache.pig.impl.util.Utils;
-import org.apache.pig.parser.ParserException;
/**
* A loader for data stored using {@link JsonStorage}. This is not a generic
@@ -171,7 +169,7 @@ public class JsonLoader extends LoadFunc
return t;
}
- } catch (JsonParseException jpe) {
+ } catch (Exception jpe) {
warn("Bad record, returning null for " + val,
PigWarning.UDF_WARNING_1);
} finally {
p.close();
@@ -180,6 +178,52 @@ public class JsonLoader extends LoadFunc
return t;
}
+ private Object readPrimitive(JsonParser p, JsonToken tok,
ResourceFieldSchema field) throws IOException {
+
+ if (tok == JsonToken.VALUE_NULL) return null;
+
+ switch(field.getType()) {
+ // Read based on our expected type
+ case DataType.BOOLEAN:
+ return p.getBooleanValue();
+
+ case DataType.INTEGER:
+ return p.getIntValue();
+
+ case DataType.LONG:
+ return p.getLongValue();
+
+ case DataType.FLOAT:
+ return p.getFloatValue();
+
+ case DataType.DOUBLE:
+ return p.getDoubleValue();
+
+ case DataType.DATETIME:
+ DateTimeFormatter formatter =
ISODateTimeFormat.dateTimeParser();
+ return formatter.withOffsetParsed().parseDateTime(p.getText());
+
+ case DataType.BYTEARRAY:
+ byte[] b = p.getText().getBytes();
+ // Use the DBA constructor that copies the bytes so that we own
+ // the memory
+ return new DataByteArray(b, 0, b.length);
+
+ case DataType.CHARARRAY:
+ return p.getText();
+
+ case DataType.BIGINTEGER:
+ return p.getBigIntegerValue();
+
+ case DataType.BIGDECIMAL:
+ return new BigDecimal(p.getText());
+
+ default:
+ throw new IOException("Unknown type in input schema: " +
+ field.getType() );
+ }
+ }
+
private Object readField(JsonParser p,
ResourceFieldSchema field,
int fieldnum) throws IOException {
@@ -193,67 +237,14 @@ public class JsonLoader extends LoadFunc
// Check to see if this value was null
if (tok == JsonToken.VALUE_NULL) return null;
+
+ tok = p.nextToken();
// Read based on our expected type
switch (field.getType()) {
- case DataType.BOOLEAN:
- tok = p.nextToken();
- if (tok == JsonToken.VALUE_NULL) return null;
- return p.getBooleanValue();
-
- case DataType.INTEGER:
- // Read the field name
- tok = p.nextToken();
- if (tok == JsonToken.VALUE_NULL) return null;
- return p.getIntValue();
-
- case DataType.LONG:
- tok = p.nextToken();
- if (tok == JsonToken.VALUE_NULL) return null;
- return p.getLongValue();
-
- case DataType.FLOAT:
- tok = p.nextToken();
- if (tok == JsonToken.VALUE_NULL) return null;
- return p.getFloatValue();
-
- case DataType.DOUBLE:
- tok = p.nextToken();
- if (tok == JsonToken.VALUE_NULL) return null;
- return p.getDoubleValue();
-
- case DataType.DATETIME:
- tok = p.nextToken();
- if (tok == JsonToken.VALUE_NULL) return null;
- DateTimeFormatter formatter = ISODateTimeFormat.dateTimeParser();
- return formatter.withOffsetParsed().parseDateTime(p.getText());
-
- case DataType.BYTEARRAY:
- tok = p.nextToken();
- if (tok == JsonToken.VALUE_NULL) return null;
- byte[] b = p.getText().getBytes();
- // Use the DBA constructor that copies the bytes so that we own
- // the memory
- return new DataByteArray(b, 0, b.length);
-
- case DataType.CHARARRAY:
- tok = p.nextToken();
- if (tok == JsonToken.VALUE_NULL) return null;
- return p.getText();
-
- case DataType.BIGINTEGER:
- tok = p.nextToken();
- if (tok == JsonToken.VALUE_NULL) return null;
- return p.getBigIntegerValue();
-
- case DataType.BIGDECIMAL:
- tok = p.nextToken();
- if (tok == JsonToken.VALUE_NULL) return null;
- return new BigDecimal(p.getText());
-
case DataType.MAP:
// Should be a start of the map object
- if (p.nextToken() != JsonToken.START_OBJECT) {
+ if (tok != JsonToken.START_OBJECT) {
warn("Bad map field, could not find start of object, field "
+ fieldnum, PigWarning.UDF_WARNING_1);
return null;
@@ -267,7 +258,7 @@ public class JsonLoader extends LoadFunc
return m;
case DataType.TUPLE:
- if (p.nextToken() != JsonToken.START_OBJECT) {
+ if (tok != JsonToken.START_OBJECT) {
warn("Bad tuple field, could not find start of object, "
+ "field " + fieldnum, PigWarning.UDF_WARNING_1);
return null;
@@ -289,7 +280,7 @@ public class JsonLoader extends LoadFunc
return t;
case DataType.BAG:
- if (p.nextToken() != JsonToken.START_ARRAY) {
+ if (tok != JsonToken.START_ARRAY) {
warn("Bad bag field, could not find start of array, "
+ "field " + fieldnum, PigWarning.UDF_WARNING_1);
return null;
@@ -305,28 +296,29 @@ public class JsonLoader extends LoadFunc
JsonToken innerTok;
while ((innerTok = p.nextToken()) != JsonToken.END_ARRAY) {
- if (innerTok != JsonToken.START_OBJECT) {
- warn("Bad bag tuple field, could not find start of "
- + "object, field " + fieldnum,
PigWarning.UDF_WARNING_1);
- return null;
- }
-
t = tupleFactory.newTuple(fs.length);
- for (int j = 0; j < fs.length; j++) {
- t.set(j, readField(p, fs[j], j));
+ if (innerTok == JsonToken.START_OBJECT) {
+ for (int j = 0; j < fs.length; j++) {
+ t.set(j, readField(p, fs[j], j));
+ }
+
+ if (p.nextToken() != JsonToken.END_OBJECT) {
+ warn("Bad bag tuple field, could not find end of "
+ + "object, field " + fieldnum,
PigWarning.UDF_WARNING_1);
+ return null;
+ }
+ bag.add(t);
+ } else {
+
+ // handle array of kind [ primitive, primitive ... ]
+ t.set(0, readPrimitive(p, innerTok, fs[0]));
+ bag.add(t);
}
-
- if (p.nextToken() != JsonToken.END_OBJECT) {
- warn("Bad bag tuple field, could not find end of "
- + "object, field " + fieldnum,
PigWarning.UDF_WARNING_1);
- return null;
- }
- bag.add(t);
}
return bag;
+
default:
- throw new IOException("Unknown type in input schema: " +
- field.getType());
+ return readPrimitive(p, tok, field);
}
}
Modified: pig/trunk/test/org/apache/pig/test/TestJsonLoaderStorage.java
URL:
http://svn.apache.org/viewvc/pig/trunk/test/org/apache/pig/test/TestJsonLoaderStorage.java?rev=1649711&r1=1649710&r2=1649711&view=diff
==============================================================================
--- pig/trunk/test/org/apache/pig/test/TestJsonLoaderStorage.java (original)
+++ pig/trunk/test/org/apache/pig/test/TestJsonLoaderStorage.java Tue Jan 6
01:52:33 2015
@@ -99,6 +99,13 @@ public class TestJsonLoaderStorage {
"\"m\":[{\"a\":123},{\"a\":456},{\"a\":789}]" +
"}";
+ private static final String arraysJson =
+ "{" +
+ "\"s\":[\"abc\",\"def\",\"ghi\"]," +
+ "\"i\":[23,45,78]," +
+ "\"f\":[23.1,45.2,78.3]" +
+ "}";
+
private static final String nullJson =
"{" +
"\"a\":null," +
@@ -270,6 +277,24 @@ public class TestJsonLoaderStorage {
}
@SuppressWarnings("rawtypes")
+ @Test
+ public void testJsonLoaderArrays() throws IOException{
+
+ String arraysJsonFile = createInput(arraysJson);
+ pigServer.registerQuery("data = load '" + arraysJsonFile + "' using
JsonLoader('s:bag{a:tuple(a:chararray)}, i:bag{a:tuple(a:int)},
f:bag{a:tuple(a:double)}');");
+
+ Iterator<Tuple> tuples = pigServer.openIterator("data");
+
+ Tuple t = tuples.next();
+ assertTrue(t.size()==3);
+ assertTrue(t.get(0)!=null);
+ assertTrue(t.get(1)!=null);
+ assertTrue(t.get(2)!=null);
+ assertTrue(! tuples.hasNext());
+
+ }
+
+ @SuppressWarnings("rawtypes")
@Test
public void testJsonLoaderBigDecimalFormats() throws IOException{