Author: gates
Date: Wed Feb 22 22:56:49 2012
New Revision: 1292568
URL: http://svn.apache.org/viewvc?rev=1292568&view=rev
Log:
HCATALOG-249 Rework JSON StorageDriver into a JSON SerDe
Added:
incubator/hcatalog/trunk/src/java/org/apache/hcatalog/data/JsonSerDe.java
incubator/hcatalog/trunk/src/test/org/apache/hcatalog/data/TestJsonSerDe.java
Modified:
incubator/hcatalog/trunk/CHANGES.txt
incubator/hcatalog/trunk/build.xml
incubator/hcatalog/trunk/ivy.xml
incubator/hcatalog/trunk/ivy/libraries.properties
Modified: incubator/hcatalog/trunk/CHANGES.txt
URL:
http://svn.apache.org/viewvc/incubator/hcatalog/trunk/CHANGES.txt?rev=1292568&r1=1292567&r2=1292568&view=diff
==============================================================================
--- incubator/hcatalog/trunk/CHANGES.txt (original)
+++ incubator/hcatalog/trunk/CHANGES.txt Wed Feb 22 22:56:49 2012
@@ -28,6 +28,8 @@ Trunk (unreleased changes)
HCAT-240. Changes to HCatOutputFormat to make it use SerDes instead of
StorageDriver (toffer)
NEW FEATURES
+ HCAT-249 Rework JSON StorageDriver into a JSON SerDe (khorgath via gates)
+
HCAT-255 Define hadoop properties on the hcat command line (ctdean via gates)
HCAT-2 Support nested schema conversion between Hive an Pig (julienledem via
hashutosh)
Modified: incubator/hcatalog/trunk/build.xml
URL:
http://svn.apache.org/viewvc/incubator/hcatalog/trunk/build.xml?rev=1292568&r1=1292567&r2=1292568&view=diff
==============================================================================
--- incubator/hcatalog/trunk/build.xml (original)
+++ incubator/hcatalog/trunk/build.xml Wed Feb 22 22:56:49 2012
@@ -471,6 +471,8 @@
<include name="commons-logging-*.jar"/>
<include name="commons-logging-api-*.jar"/>
<include name="commons-pool-*.jar"/>
+ <include name="jackson-mapper-asl-*.jar"/>
+ <include name="jackson-core-asl-*.jar"/>
<include name="datanucleus-connectionpool-*.jar"/>
<include name="datanucleus-core-*.jar"/>
<include name="datanucleus-enhancer-*.jar"/>
Modified: incubator/hcatalog/trunk/ivy.xml
URL:
http://svn.apache.org/viewvc/incubator/hcatalog/trunk/ivy.xml?rev=1292568&r1=1292567&r2=1292568&view=diff
==============================================================================
--- incubator/hcatalog/trunk/ivy.xml (original)
+++ incubator/hcatalog/trunk/ivy.xml Wed Feb 22 22:56:49 2012
@@ -51,5 +51,7 @@
<dependency org="javax.management.j2ee" name="management-api"
rev="${javax-mgmt.version}" conf="common->master" />
<dependency org="com.google.code.p.arat" name="rat-lib"
rev="${rats-lib.version}" conf="releaseaudit->default"/>
<dependency org="org.vafer" name="jdeb" rev="${jdeb.version}"
conf="package->master"/>
+ <dependency org="org.codehaus.jackson" name="jackson-mapper-asl"
rev="${jackson.version}" conf="common->master"/>
+ <dependency org="org.codehaus.jackson" name="jackson-core-asl"
rev="${jackson.version}" conf="common->master"/>
</dependencies>
</ivy-module>
Modified: incubator/hcatalog/trunk/ivy/libraries.properties
URL:
http://svn.apache.org/viewvc/incubator/hcatalog/trunk/ivy/libraries.properties?rev=1292568&r1=1292567&r2=1292568&view=diff
==============================================================================
--- incubator/hcatalog/trunk/ivy/libraries.properties (original)
+++ incubator/hcatalog/trunk/ivy/libraries.properties Wed Feb 22 22:56:49 2012
@@ -24,3 +24,4 @@ activemq.version=5.5.0
javax-mgmt.version=1.1-rev-1
rats-lib.version=0.5.1
jdeb.version=0.8
+jackson.version=1.7.3
Added: incubator/hcatalog/trunk/src/java/org/apache/hcatalog/data/JsonSerDe.java
URL:
http://svn.apache.org/viewvc/incubator/hcatalog/trunk/src/java/org/apache/hcatalog/data/JsonSerDe.java?rev=1292568&view=auto
==============================================================================
--- incubator/hcatalog/trunk/src/java/org/apache/hcatalog/data/JsonSerDe.java
(added)
+++ incubator/hcatalog/trunk/src/java/org/apache/hcatalog/data/JsonSerDe.java
Wed Feb 22 22:56:49 2012
@@ -0,0 +1,517 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hcatalog.data;
+
+import java.io.ByteArrayInputStream;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.LinkedHashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.Properties;
+
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.hive.serde.Constants;
+import org.apache.hadoop.hive.serde2.SerDe;
+import org.apache.hadoop.hive.serde2.SerDeException;
+import org.apache.hadoop.hive.serde2.SerDeStats;
+import org.apache.hadoop.hive.serde2.SerDeUtils;
+import org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.MapObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.StructField;
+import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.UnionObjectInspector;
+import
org.apache.hadoop.hive.serde2.objectinspector.primitive.BinaryObjectInspector;
+import
org.apache.hadoop.hive.serde2.objectinspector.primitive.BooleanObjectInspector;
+import
org.apache.hadoop.hive.serde2.objectinspector.primitive.ByteObjectInspector;
+import
org.apache.hadoop.hive.serde2.objectinspector.primitive.DoubleObjectInspector;
+import
org.apache.hadoop.hive.serde2.objectinspector.primitive.FloatObjectInspector;
+import
org.apache.hadoop.hive.serde2.objectinspector.primitive.IntObjectInspector;
+import
org.apache.hadoop.hive.serde2.objectinspector.primitive.LongObjectInspector;
+import
org.apache.hadoop.hive.serde2.objectinspector.primitive.ShortObjectInspector;
+import
org.apache.hadoop.hive.serde2.objectinspector.primitive.StringObjectInspector;
+import
org.apache.hadoop.hive.serde2.objectinspector.primitive.TimestampObjectInspector;
+import org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo;
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory;
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils;
+import org.apache.hadoop.io.BytesWritable;
+import org.apache.hadoop.io.Text;
+import org.apache.hadoop.io.Writable;
+import org.apache.hcatalog.common.HCatException;
+import org.apache.hcatalog.common.HCatUtil;
+import org.apache.hcatalog.data.schema.HCatFieldSchema;
+import org.apache.hcatalog.data.schema.HCatFieldSchema.Type;
+import org.apache.hcatalog.data.schema.HCatSchema;
+import org.apache.hcatalog.data.schema.HCatSchemaUtils;
+
+import org.codehaus.jackson.JsonFactory;
+import org.codehaus.jackson.JsonParseException;
+import org.codehaus.jackson.JsonParser;
+import org.codehaus.jackson.JsonToken;
+
+public class JsonSerDe implements SerDe {
+
+ public static final Log LOG = LogFactory
+ .getLog(JsonSerDe.class.getName());
+
+ private List<String> columnNames;
+ private List<TypeInfo> columnTypes;
+
+ private StructTypeInfo rowTypeInfo;
+ private HCatSchema schema;
+
+ private JsonFactory jsonFactory = null;
+
+ private HCatRecordObjectInspector cachedObjectInspector;
+
+ @Override
+ public void initialize(Configuration conf, Properties tbl)
+ throws SerDeException {
+
+ if (LOG.isDebugEnabled()){
+ LOG.debug("Initializing JsonSerDe");
+ HCatUtil.logEntrySet(LOG, "props to serde", tbl.entrySet());
+ }
+
+ // Get column names and types
+ String columnNameProperty = tbl.getProperty(Constants.LIST_COLUMNS);
+ String columnTypeProperty = tbl.getProperty(Constants.LIST_COLUMN_TYPES);
+
+ // all table column names
+ if (columnNameProperty.length() == 0) {
+ columnNames = new ArrayList<String>();
+ } else {
+ columnNames = Arrays.asList(columnNameProperty.split(","));
+ }
+
+ // all column types
+ if (columnTypeProperty.length() == 0) {
+ columnTypes = new ArrayList<TypeInfo>();
+ } else {
+ columnTypes =
TypeInfoUtils.getTypeInfosFromTypeString(columnTypeProperty);
+ }
+
+ if (LOG.isDebugEnabled()){
+ LOG.debug("columns:" + columnNameProperty);
+ for (String s : columnNames){
+ LOG.debug("cn:"+s);
+ }
+ LOG.debug("types: " + columnTypeProperty);
+ for (TypeInfo t : columnTypes){
+ LOG.debug("ct:"+t.getTypeName()+",type:"+t.getCategory());
+ }
+ }
+
+ assert (columnNames.size() == columnTypes.size());
+
+ rowTypeInfo = (StructTypeInfo)
TypeInfoFactory.getStructTypeInfo(columnNames, columnTypes);
+
+ cachedObjectInspector =
HCatRecordObjectInspectorFactory.getHCatRecordObjectInspector(rowTypeInfo);
+ try {
+ schema =
HCatSchemaUtils.getHCatSchema(rowTypeInfo).get(0).getStructSubSchema();
+ if (LOG.isDebugEnabled()){
+ LOG.debug("schema : "+ schema);
+ LOG.debug("\tfields : "+schema.getFieldNames());
+ }
+ } catch (HCatException e) {
+ throw new SerDeException(e);
+ }
+
+ jsonFactory = new JsonFactory();
+ }
+
+ /**
+ * Takes JSON string in Text form, and has to return an object
representation above
+ * it that's readable by the corresponding object inspector.
+ *
+ * For this implementation, since we're using the jackson parser, we can
construct
+ * our own object implementation, and we use HCatRecord for it
+ */
+ @Override
+ public Object deserialize(Writable blob) throws SerDeException {
+
+ Text t = (Text)blob;
+ JsonParser p;
+ List<Object> r = new
ArrayList<Object>(Collections.nCopies(columnNames.size(), null));
+ try {
+ p = jsonFactory.createJsonParser(new
ByteArrayInputStream((t.getBytes())));
+ if (p.nextToken() != JsonToken.START_OBJECT) {
+ throw new IOException("Start token not found where expected");
+ }
+ JsonToken token;
+ while( ((token = p.nextToken()) != JsonToken.END_OBJECT)&&(token !=
null)){
+ // iterate through each token, and create appropriate object here.
+ populateRecord(r,token,p,schema);
+ }
+ } catch (JsonParseException e) {
+ LOG.warn("Error ["+ e.getMessage()+"] parsing json text ["+t+"]");
+ throw new SerDeException(e);
+ } catch (IOException e) {
+ LOG.warn("Error ["+ e.getMessage()+"] parsing json text ["+t+"]");
+ throw new SerDeException(e);
+ }
+
+ return new DefaultHCatRecord(r);
+ }
+
+ private void populateRecord(List<Object> r, JsonToken token, JsonParser p,
HCatSchema s) throws IOException {
+ if (token != JsonToken.FIELD_NAME){
+ throw new IOException("Field name expected");
+ }
+ String fieldName = p.getText();
+ int fpos = s.getPosition(fieldName);
+ HCatFieldSchema hcatFieldSchema = s.getFields().get(fpos);
+
+ r.set(fpos,extractCurrentField(p, null, hcatFieldSchema,false));
+ }
+
+ /**
+ * Utility method to extract current expected field from given JsonParser
+ *
+ * To get the field, we need either a type or a hcatFieldSchema(necessary
for complex types)
+ * It is possible that one of them can be null, and so, if so, the other is
instantiated
+ * from the other
+ *
+ * isTokenCurrent is a boolean variable also passed in, which determines
+ * if the JsonParser is already at the token we expect to read next, or
+ * needs advancing to the next before we read.
+ */
+ private Object extractCurrentField(JsonParser p, Type t,
+ HCatFieldSchema hcatFieldSchema, boolean isTokenCurrent) throws
IOException, JsonParseException,
+ HCatException {
+ Object val = null;
+ JsonToken valueToken;
+ if (isTokenCurrent){
+ valueToken = p.getCurrentToken();
+ } else {
+ valueToken = p.nextToken();
+ }
+
+ if (hcatFieldSchema != null){
+ t = hcatFieldSchema.getType();
+ }
+ switch(t) {
+ case INT:
+ val = (valueToken == JsonToken.VALUE_NULL)?null:p.getIntValue();
+ break;
+ case TINYINT:
+ val = (valueToken == JsonToken.VALUE_NULL)?null:p.getByteValue();
+ break;
+ case SMALLINT:
+ val = (valueToken == JsonToken.VALUE_NULL)?null:p.getShortValue();
+ break;
+ case BIGINT:
+ val = (valueToken == JsonToken.VALUE_NULL)?null:p.getLongValue();
+ break;
+ case BOOLEAN:
+ String bval = (valueToken == JsonToken.VALUE_NULL)?null:p.getText();
+ val = (bval.equalsIgnoreCase("true"));
+ break;
+ case FLOAT:
+ val = (valueToken == JsonToken.VALUE_NULL)?null:p.getFloatValue();
+ break;
+ case DOUBLE:
+ val = (valueToken == JsonToken.VALUE_NULL)?null:p.getDoubleValue();
+ break;
+ case STRING:
+ val = (valueToken == JsonToken.VALUE_NULL)?null:p.getText();
+ break;
+ case BINARY:
+ throw new IOException("JsonSerDe does not support BINARY type");
+ case ARRAY:
+ if (valueToken != JsonToken.START_ARRAY){
+ throw new IOException("Start of Array expected");
+ }
+ List<Object> arr = new ArrayList<Object>();
+ while ((valueToken = p.nextToken()) != JsonToken.END_ARRAY) {
+ arr.add(extractCurrentField(p,
null,hcatFieldSchema.getArrayElementSchema().get(0),true));
+ }
+ val = arr;
+ break;
+ case MAP:
+ if (valueToken != JsonToken.START_OBJECT){
+ throw new IOException("Start of Object expected");
+ }
+ Map<Object,Object> map = new LinkedHashMap<Object,Object>();
+ Type keyType = hcatFieldSchema.getMapKeyType();
+ HCatFieldSchema valueSchema = hcatFieldSchema.getMapValueSchema().get(0);
+ while ((valueToken = p.nextToken()) != JsonToken.END_OBJECT) {
+ Object k =
getObjectOfCorrespondingPrimitiveType(p.getCurrentName(),keyType);
+ Object v;
+ if (valueSchema.getType() == HCatFieldSchema.Type.STRUCT){
+ v = extractCurrentField(p,null, valueSchema,false);
+ } else {
+ v = extractCurrentField(p,null, valueSchema,true);
+ }
+
+ map.put(k, v);
+ }
+ val = map;
+ break;
+ case STRUCT:
+ if (valueToken != JsonToken.START_OBJECT){
+ throw new IOException("Start of Object expected");
+ }
+ HCatSchema subSchema = hcatFieldSchema.getStructSubSchema();
+ int sz = subSchema.getFieldNames().size();
+
+ List<Object> struct = new ArrayList<Object>(Collections.nCopies(sz,
null));
+ while ((valueToken = p.nextToken()) != JsonToken.END_OBJECT) {
+ populateRecord(struct, valueToken, p, subSchema);
+ }
+ val = struct;
+ break;
+ }
+ return val;
+ }
+
+ private Object getObjectOfCorrespondingPrimitiveType(String s, Type t)
throws IOException {
+ switch(t) {
+ case INT:
+ return Integer.valueOf(s);
+ case TINYINT:
+ return Byte.valueOf(s);
+ case SMALLINT:
+ return Short.valueOf(s);
+ case BIGINT:
+ return Long.valueOf(s);
+ case BOOLEAN:
+ return (s.equalsIgnoreCase("true"));
+ case FLOAT:
+ return Float.valueOf(s);
+ case DOUBLE:
+ return Double.valueOf(s);
+ case STRING:
+ return s;
+ case BINARY:
+ throw new IOException("JsonSerDe does not support BINARY type");
+ }
+ throw new IOException("Could not convert from string to map type "+t);
+ }
+
+ /**
+ * Given an object and object inspector pair, traverse the object
+ * and generate a Text representation of the object.
+ */
+ @Override
+ public Writable serialize(Object obj, ObjectInspector objInspector)
+ throws SerDeException {
+ StringBuilder sb = new StringBuilder();
+ try {
+ buildJSONString(sb, obj, objInspector);
+ } catch (IOException e) {
+ LOG.warn("Error ["+ e.getMessage()+"] generating json text from object");
+ throw new SerDeException(e);
+ }
+ return new Text(sb.toString());
+ }
+
+ // TODO : code section copied over from SerDeUtils because of non-standard
json production there
+ // should use quotes for all field names. We should fix this there, and then
remove this copy.
+ // See
http://jackson.codehaus.org/1.7.3/javadoc/org/codehaus/jackson/JsonParser.Feature.html#ALLOW_UNQUOTED_FIELD_NAMES
+ // for details - trying to enable Jackson to ignore that doesn't seem to
work(compilation failure
+ // when attempting to use that feature, so having to change the production
itself.
+ // Also, throws IOException when Binary is detected.
+ private static void buildJSONString(StringBuilder sb, Object o,
ObjectInspector oi) throws IOException {
+
+ switch (oi.getCategory()) {
+ case PRIMITIVE: {
+ PrimitiveObjectInspector poi = (PrimitiveObjectInspector) oi;
+ if (o == null) {
+ sb.append("null");
+ } else {
+ switch (poi.getPrimitiveCategory()) {
+ case BOOLEAN: {
+ boolean b = ((BooleanObjectInspector) poi).get(o);
+ sb.append(b ? "true" : "false");
+ break;
+ }
+ case BYTE: {
+ sb.append(((ByteObjectInspector) poi).get(o));
+ break;
+ }
+ case SHORT: {
+ sb.append(((ShortObjectInspector) poi).get(o));
+ break;
+ }
+ case INT: {
+ sb.append(((IntObjectInspector) poi).get(o));
+ break;
+ }
+ case LONG: {
+ sb.append(((LongObjectInspector) poi).get(o));
+ break;
+ }
+ case FLOAT: {
+ sb.append(((FloatObjectInspector) poi).get(o));
+ break;
+ }
+ case DOUBLE: {
+ sb.append(((DoubleObjectInspector) poi).get(o));
+ break;
+ }
+ case STRING: {
+ sb.append('"');
+ sb.append(SerDeUtils.escapeString(((StringObjectInspector) poi)
+ .getPrimitiveJavaObject(o)));
+ sb.append('"');
+ break;
+ }
+ case TIMESTAMP: {
+ sb.append('"');
+ sb.append(((TimestampObjectInspector) poi)
+ .getPrimitiveWritableObject(o));
+ sb.append('"');
+ break;
+ }
+ case BINARY: {
+ throw new IOException("JsonSerDe does not support BINARY type");
+ }
+ default:
+ throw new RuntimeException("Unknown primitive type: "
+ + poi.getPrimitiveCategory());
+ }
+ }
+ break;
+ }
+ case LIST: {
+ ListObjectInspector loi = (ListObjectInspector) oi;
+ ObjectInspector listElementObjectInspector = loi
+ .getListElementObjectInspector();
+ List<?> olist = loi.getList(o);
+ if (olist == null) {
+ sb.append("null");
+ } else {
+ sb.append(SerDeUtils.LBRACKET);
+ for (int i = 0; i < olist.size(); i++) {
+ if (i > 0) {
+ sb.append(SerDeUtils.COMMA);
+ }
+ buildJSONString(sb, olist.get(i), listElementObjectInspector);
+ }
+ sb.append(SerDeUtils.RBRACKET);
+ }
+ break;
+ }
+ case MAP: {
+ MapObjectInspector moi = (MapObjectInspector) oi;
+ ObjectInspector mapKeyObjectInspector = moi.getMapKeyObjectInspector();
+ ObjectInspector mapValueObjectInspector = moi
+ .getMapValueObjectInspector();
+ Map<?, ?> omap = moi.getMap(o);
+ if (omap == null) {
+ sb.append("null");
+ } else {
+ sb.append(SerDeUtils.LBRACE);
+ boolean first = true;
+ for (Object entry : omap.entrySet()) {
+ if (first) {
+ first = false;
+ } else {
+ sb.append(SerDeUtils.COMMA);
+ }
+ Map.Entry<?, ?> e = (Map.Entry<?, ?>) entry;
+ StringBuilder keyBuilder = new StringBuilder();
+ buildJSONString(keyBuilder, e.getKey(), mapKeyObjectInspector);
+ String keyString = keyBuilder.toString().trim();
+ boolean doQuoting = (!keyString.isEmpty()) &&
(keyString.charAt(0)!= SerDeUtils.QUOTE);
+ if (doQuoting ){
+ sb.append(SerDeUtils.QUOTE);
+ }
+ sb.append(keyString);
+ if (doQuoting ){
+ sb.append(SerDeUtils.QUOTE);
+ }
+ sb.append(SerDeUtils.COLON);
+ buildJSONString(sb, e.getValue(), mapValueObjectInspector);
+ }
+ sb.append(SerDeUtils.RBRACE);
+ }
+ break;
+ }
+ case STRUCT: {
+ StructObjectInspector soi = (StructObjectInspector) oi;
+ List<? extends StructField> structFields = soi.getAllStructFieldRefs();
+ if (o == null) {
+ sb.append("null");
+ } else {
+ sb.append(SerDeUtils.LBRACE);
+ for (int i = 0; i < structFields.size(); i++) {
+ if (i > 0) {
+ sb.append(SerDeUtils.COMMA);
+ }
+ sb.append(SerDeUtils.QUOTE);
+ sb.append(structFields.get(i).getFieldName());
+ sb.append(SerDeUtils.QUOTE);
+ sb.append(SerDeUtils.COLON);
+ buildJSONString(sb, soi.getStructFieldData(o, structFields.get(i)),
+ structFields.get(i).getFieldObjectInspector());
+ }
+ sb.append(SerDeUtils.RBRACE);
+ }
+ break;
+ }
+ case UNION: {
+ UnionObjectInspector uoi = (UnionObjectInspector) oi;
+ if (o == null) {
+ sb.append("null");
+ } else {
+ sb.append(SerDeUtils.LBRACE);
+ sb.append(uoi.getTag(o));
+ sb.append(SerDeUtils.COLON);
+ buildJSONString(sb, uoi.getField(o),
+ uoi.getObjectInspectors().get(uoi.getTag(o)));
+ sb.append(SerDeUtils.RBRACE);
+ }
+ break;
+ }
+ default:
+ throw new RuntimeException("Unknown type in ObjectInspector!");
+ }
+ }
+
+
+ /**
+ * Returns an object inspector for the specified schema that
+ * is capable of reading in the object representation of the JSON string
+ */
+ @Override
+ public ObjectInspector getObjectInspector() throws SerDeException {
+ return cachedObjectInspector;
+ }
+
+ @Override
+ public Class<? extends Writable> getSerializedClass() {
+ return Text.class;
+ }
+
+ @Override
+ public SerDeStats getSerDeStats() {
+ // no support for statistics yet
+ return null;
+ }
+
+}
Added:
incubator/hcatalog/trunk/src/test/org/apache/hcatalog/data/TestJsonSerDe.java
URL:
http://svn.apache.org/viewvc/incubator/hcatalog/trunk/src/test/org/apache/hcatalog/data/TestJsonSerDe.java?rev=1292568&view=auto
==============================================================================
---
incubator/hcatalog/trunk/src/test/org/apache/hcatalog/data/TestJsonSerDe.java
(added)
+++
incubator/hcatalog/trunk/src/test/org/apache/hcatalog/data/TestJsonSerDe.java
Wed Feb 22 22:56:49 2012
@@ -0,0 +1,131 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hcatalog.data;
+
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.Map.Entry;
+import java.util.Properties;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.hive.serde.Constants;
+import org.apache.hadoop.io.Writable;
+
+import junit.framework.Assert;
+import junit.framework.TestCase;
+
+public class TestJsonSerDe extends TestCase{
+
+ public Map<Properties,HCatRecord> getData(){
+ Map<Properties,HCatRecord> data = new HashMap<Properties,HCatRecord>();
+
+ List<Object> rlist = new ArrayList<Object>(11);
+ rlist.add(new Byte("123"));
+ rlist.add(new Short("456"));
+ rlist.add(new Integer(789));
+ rlist.add(new Long(1000L));
+ rlist.add(new Double(5.3D));
+ rlist.add(new Float(2.39F));
+ rlist.add(new String("hcat and hadoop"));
+ rlist.add(null);
+
+ List<Object> innerStruct = new ArrayList<Object>(2);
+ innerStruct.add(new String("abc"));
+ innerStruct.add(new String("def"));
+ rlist.add(innerStruct);
+
+ List<Integer> innerList = new ArrayList<Integer>();
+ innerList.add(314);
+ innerList.add(007);
+ rlist.add(innerList);
+
+ Map<Short, String> map = new HashMap<Short, String>(3);
+ map.put(new Short("2"), "hcat is cool");
+ map.put(new Short("3"), "is it?");
+ map.put(new Short("4"), "or is it not?");
+ rlist.add(map);
+
+ rlist.add(new Boolean(true));
+
+ List<Object> c1 = new ArrayList<Object>();
+ List<Object> c1_1 = new ArrayList<Object>();
+ c1_1.add(new Integer(12));
+ List<Object> i2 = new ArrayList<Object>();
+ List<Integer> ii1 = new ArrayList<Integer>();
+ ii1.add(new Integer(13));
+ ii1.add(new Integer(14));
+ i2.add(ii1);
+ Map<String,List<?>> ii2 = new HashMap<String,List<?>>();
+ List<Integer> iii1 = new ArrayList<Integer>();
+ iii1.add(new Integer(15));
+ ii2.put("phew", iii1);
+ i2.add(ii2);
+ c1_1.add(i2);
+ c1.add(c1_1);
+ rlist.add(c1);
+
+ String typeString =
+ "tinyint,smallint,int,bigint,double,float,string,string,"
+ + "struct<a:string,b:string>,array<int>,map<smallint,string>,boolean,"
+ +
"array<struct<i1:int,i2:struct<ii1:array<int>,ii2:map<string,struct<iii1:int>>>>>";
+ Properties props = new Properties();
+
+ props.put(Constants.LIST_COLUMNS, "ti,si,i,bi,d,f,s,n,r,l,m,b,c1");
+ props.put(Constants.LIST_COLUMN_TYPES, typeString);
+// props.put(Constants.SERIALIZATION_NULL_FORMAT, "\\N");
+// props.put(Constants.SERIALIZATION_FORMAT, "1");
+
+ data.put(props, new DefaultHCatRecord(rlist));
+ return data;
+ }
+
+ public void testRW() throws Exception {
+
+ Configuration conf = new Configuration();
+
+ for (Entry<Properties,HCatRecord> e : getData().entrySet()){
+ Properties tblProps = e.getKey();
+ HCatRecord r = e.getValue();
+
+ HCatRecordSerDe hrsd = new HCatRecordSerDe();
+ hrsd.initialize(conf, tblProps);
+
+ JsonSerDe jsde = new JsonSerDe();
+ jsde.initialize(conf, tblProps);
+
+ System.out.println("ORIG:"+r.toString());
+
+ Writable s = hrsd.serialize(r,hrsd.getObjectInspector());
+ System.out.println("ONE:"+s.toString());
+
+ Object o1 = hrsd.deserialize(s);
+ assertTrue(HCatDataCheckUtil.recordsEqual(r, (HCatRecord) o1));
+
+ Writable s2 = jsde.serialize(o1, hrsd.getObjectInspector());
+ System.out.println("TWO:"+s2.toString());
+ Object o2 = jsde.deserialize(s2);
+ System.out.println("deserialized TWO : "+o2);
+
+ assertTrue(HCatDataCheckUtil.recordsEqual(r, (HCatRecord) o2));
+ }
+
+ }
+
+}