Author: gates
Date: Thu Feb 2 17:23:58 2012
New Revision: 1239733
URL: http://svn.apache.org/viewvc?rev=1239733&view=rev
Log:
HCATALOG-204 HCatRecord SerDe
Added:
incubator/hcatalog/branches/branch-0.3/src/java/org/apache/hcatalog/data/HCatRecordObjectInspector.java
incubator/hcatalog/branches/branch-0.3/src/java/org/apache/hcatalog/data/HCatRecordObjectInspectorFactory.java
incubator/hcatalog/branches/branch-0.3/src/java/org/apache/hcatalog/data/HCatRecordSerDe.java
incubator/hcatalog/branches/branch-0.3/src/test/org/apache/hcatalog/data/TestHCatRecordSerDe.java
Modified:
incubator/hcatalog/branches/branch-0.3/CHANGES.txt
incubator/hcatalog/branches/branch-0.3/src/java/org/apache/hcatalog/data/DefaultHCatRecord.java
incubator/hcatalog/branches/branch-0.3/src/java/org/apache/hcatalog/data/HCatRecord.java
Modified: incubator/hcatalog/branches/branch-0.3/CHANGES.txt
URL:
http://svn.apache.org/viewvc/incubator/hcatalog/branches/branch-0.3/CHANGES.txt?rev=1239733&r1=1239732&r2=1239733&view=diff
==============================================================================
--- incubator/hcatalog/branches/branch-0.3/CHANGES.txt (original)
+++ incubator/hcatalog/branches/branch-0.3/CHANGES.txt Thu Feb 2 17:23:58 2012
@@ -23,6 +23,8 @@ Release 0.3.0 (unreleased changes)
INCOMPATIBLE CHANGES
NEW FEATURES
+ HCAT-204. HCatRecord SerDe (khorgath via gates)
+
HCAT-192. HBase output storage driver integration with zookeeper based
revision manager (toffer via hashutosh)
HCAT-191. HBase input storage driver integration with zookeeper based
revision manager (avandana via toffer)
@@ -191,7 +193,7 @@ Release 0.3.0 (unreleased changes)
HCAT-115. Superfluous warning on fresh install (ctdean via khorgath)
-Release 0.2.0
+Release 0.2.0 October 2, 2011
INCOMPATIBLE CHANGES
Modified:
incubator/hcatalog/branches/branch-0.3/src/java/org/apache/hcatalog/data/DefaultHCatRecord.java
URL:
http://svn.apache.org/viewvc/incubator/hcatalog/branches/branch-0.3/src/java/org/apache/hcatalog/data/DefaultHCatRecord.java?rev=1239733&r1=1239732&r2=1239733&view=diff
==============================================================================
---
incubator/hcatalog/branches/branch-0.3/src/java/org/apache/hcatalog/data/DefaultHCatRecord.java
(original)
+++
incubator/hcatalog/branches/branch-0.3/src/java/org/apache/hcatalog/data/DefaultHCatRecord.java
Thu Feb 2 17:23:58 2012
@@ -29,7 +29,7 @@ import org.apache.hcatalog.data.schema.H
public class DefaultHCatRecord extends HCatRecord {
- private final List<Object> contents;
+ private List<Object> contents;
public DefaultHCatRecord(){
contents = new ArrayList<Object>();
@@ -150,4 +150,9 @@ public class DefaultHCatRecord extends H
set(recordSchema.getPosition(fieldName),value);
}
+ @Override
+ public void copy(HCatRecord r) throws HCatException {
+ this.contents = r.getAll();
+ }
+
}
Modified:
incubator/hcatalog/branches/branch-0.3/src/java/org/apache/hcatalog/data/HCatRecord.java
URL:
http://svn.apache.org/viewvc/incubator/hcatalog/branches/branch-0.3/src/java/org/apache/hcatalog/data/HCatRecord.java?rev=1239733&r1=1239732&r2=1239733&view=diff
==============================================================================
---
incubator/hcatalog/branches/branch-0.3/src/java/org/apache/hcatalog/data/HCatRecord.java
(original)
+++
incubator/hcatalog/branches/branch-0.3/src/java/org/apache/hcatalog/data/HCatRecord.java
Thu Feb 2 17:23:58 2012
@@ -35,6 +35,7 @@ public abstract class HCatRecord impleme
public abstract Object get(String fieldName, HCatSchema recordSchema)
throws HCatException;
public abstract void set(String fieldName, HCatSchema recordSchema, Object
value ) throws HCatException;
public abstract void remove(int idx) throws HCatException;
+ public abstract void copy(HCatRecord r) throws HCatException;
protected Object get(String fieldName, HCatSchema recordSchema, Class
clazz) throws HCatException{
// TODO : if needed, verify that recordschema entry for fieldname
matches appropriate type.
Added:
incubator/hcatalog/branches/branch-0.3/src/java/org/apache/hcatalog/data/HCatRecordObjectInspector.java
URL:
http://svn.apache.org/viewvc/incubator/hcatalog/branches/branch-0.3/src/java/org/apache/hcatalog/data/HCatRecordObjectInspector.java?rev=1239733&view=auto
==============================================================================
---
incubator/hcatalog/branches/branch-0.3/src/java/org/apache/hcatalog/data/HCatRecordObjectInspector.java
(added)
+++
incubator/hcatalog/branches/branch-0.3/src/java/org/apache/hcatalog/data/HCatRecordObjectInspector.java
Thu Feb 2 17:23:58 2012
@@ -0,0 +1,50 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hcatalog.data;
+
+import java.util.List;
+
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
+import
org.apache.hadoop.hive.serde2.objectinspector.StandardStructObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.StructField;
+
+public class HCatRecordObjectInspector extends StandardStructObjectInspector {
+
+ protected HCatRecordObjectInspector(List<String> structFieldNames,
+ List<ObjectInspector> structFieldObjectInspectors) {
+ super(structFieldNames, structFieldObjectInspectors);
+ }
+
+ @Override
+ public Object getStructFieldData(Object data, StructField fieldRef) {
+ if (data == null){
+ return null;
+ }
+
+ int fieldID = ((MyField) fieldRef).getFieldID();
+ assert (fieldID >= 0 && fieldID < fields.size());
+
+ return ((HCatRecord) data).get(fieldID);
+ }
+
+ @Override
+ public List<Object> getStructFieldsDataAsList(Object o) {
+ return ((HCatRecord) o).getAll();
+ }
+
+}
Added:
incubator/hcatalog/branches/branch-0.3/src/java/org/apache/hcatalog/data/HCatRecordObjectInspectorFactory.java
URL:
http://svn.apache.org/viewvc/incubator/hcatalog/branches/branch-0.3/src/java/org/apache/hcatalog/data/HCatRecordObjectInspectorFactory.java?rev=1239733&view=auto
==============================================================================
---
incubator/hcatalog/branches/branch-0.3/src/java/org/apache/hcatalog/data/HCatRecordObjectInspectorFactory.java
(added)
+++
incubator/hcatalog/branches/branch-0.3/src/java/org/apache/hcatalog/data/HCatRecordObjectInspectorFactory.java
Thu Feb 2 17:23:58 2012
@@ -0,0 +1,131 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hcatalog.data;
+
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.hive.serde2.SerDeException;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
+import
org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
+import org.apache.hadoop.hive.serde2.typeinfo.ListTypeInfo;
+import org.apache.hadoop.hive.serde2.typeinfo.MapTypeInfo;
+import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo;
+import org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo;
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
+
+/**
+ * ObjectInspectorFactory for HCatRecordObjectInspectors (and associated
helper inspectors)
+ */
+public class HCatRecordObjectInspectorFactory {
+
+ public static final Log LOG = LogFactory
+ .getLog(HCatRecordObjectInspectorFactory.class.getName());
+
+ static HashMap<TypeInfo, HCatRecordObjectInspector>
cachedHCatRecordObjectInspectors =
+ new HashMap<TypeInfo, HCatRecordObjectInspector>();
+ static HashMap<TypeInfo, ObjectInspector> cachedObjectInspectors =
+ new HashMap<TypeInfo, ObjectInspector>();
+
+ /**
+ * Returns HCatRecordObjectInspector given a StructTypeInfo type definition
for the record to look into
+ * @param typeInfo Type definition for the record to look into
+ * @return appropriate HCatRecordObjectInspector
+ * @throws SerDeException
+ */
+ public static HCatRecordObjectInspector getHCatRecordObjectInspector(
+ StructTypeInfo typeInfo) throws SerDeException {
+ HCatRecordObjectInspector oi =
cachedHCatRecordObjectInspectors.get(typeInfo);
+ if (oi == null) {
+ LOG.debug("Got asked for OI for
"+typeInfo.getCategory()+"["+typeInfo.getTypeName()+"]");
+
+ switch (typeInfo.getCategory()) {
+ case STRUCT :
+ StructTypeInfo structTypeInfo = (StructTypeInfo) typeInfo;
+ List<String> fieldNames = structTypeInfo.getAllStructFieldNames();
+ List<TypeInfo> fieldTypeInfos =
structTypeInfo.getAllStructFieldTypeInfos();
+ List<ObjectInspector> fieldObjectInspectors = new
ArrayList<ObjectInspector>(fieldTypeInfos.size());
+ for (int i = 0; i < fieldTypeInfos.size(); i++) {
+
fieldObjectInspectors.add(getStandardObjectInspectorFromTypeInfo(fieldTypeInfos.get(i)));
+ }
+ oi = new HCatRecordObjectInspector(fieldNames,fieldObjectInspectors);
+ break;
+ default:
+ // Hmm.. not good,
+ // the only type expected here is STRUCT, which maps to HCatRecord
+ // - anything else is an error. Return null as the inspector.
+ throw new SerDeException("TypeInfo ["+typeInfo.getTypeName()
+ + "] was not of struct type - HCatRecord expected struct type, got
["
+ + typeInfo.getCategory().toString()+"]");
+ }
+ cachedHCatRecordObjectInspectors.put(typeInfo, oi);
+ }
+ return oi;
+ }
+
+ public static ObjectInspector
getStandardObjectInspectorFromTypeInfo(TypeInfo typeInfo) {
+
+
+ ObjectInspector oi = cachedObjectInspectors.get(typeInfo);
+ if (oi == null){
+ LOG.debug("Got asked for OI for
"+typeInfo.getCategory()+"["+typeInfo.getTypeName()+"]");
+
+ switch (typeInfo.getCategory()) {
+ case PRIMITIVE:
+ oi = PrimitiveObjectInspectorFactory.getPrimitiveJavaObjectInspector(
+ ((PrimitiveTypeInfo) typeInfo).getPrimitiveCategory());
+ break;
+ case STRUCT:
+ StructTypeInfo structTypeInfo = (StructTypeInfo) typeInfo;
+ List<String> fieldNames = structTypeInfo.getAllStructFieldNames();
+ List<TypeInfo> fieldTypeInfos =
structTypeInfo.getAllStructFieldTypeInfos();
+ List<ObjectInspector> fieldObjectInspectors =
+ new ArrayList<ObjectInspector>(fieldTypeInfos.size());
+ for (int i = 0; i < fieldTypeInfos.size(); i++) {
+
fieldObjectInspectors.add(getStandardObjectInspectorFromTypeInfo(fieldTypeInfos.get(i)));
+ }
+ oi = ObjectInspectorFactory.getStandardStructObjectInspector(
+ fieldNames, fieldObjectInspectors
+ );
+ break;
+ case LIST:
+ ObjectInspector elementObjectInspector =
getStandardObjectInspectorFromTypeInfo(
+ ((ListTypeInfo)typeInfo).getListElementTypeInfo());
+ oi =
ObjectInspectorFactory.getStandardListObjectInspector(elementObjectInspector);
+ break;
+ case MAP:
+ ObjectInspector keyObjectInspector =
getStandardObjectInspectorFromTypeInfo(
+ ((MapTypeInfo)typeInfo).getMapKeyTypeInfo());
+ ObjectInspector valueObjectInspector =
getStandardObjectInspectorFromTypeInfo(
+ ((MapTypeInfo)typeInfo).getMapValueTypeInfo());
+ oi =
ObjectInspectorFactory.getStandardMapObjectInspector(keyObjectInspector,valueObjectInspector);
+ break;
+ default:
+ oi = null;
+ }
+ cachedObjectInspectors.put(typeInfo, oi);
+ }
+ return oi;
+ }
+
+
+}
Added:
incubator/hcatalog/branches/branch-0.3/src/java/org/apache/hcatalog/data/HCatRecordSerDe.java
URL:
http://svn.apache.org/viewvc/incubator/hcatalog/branches/branch-0.3/src/java/org/apache/hcatalog/data/HCatRecordSerDe.java?rev=1239733&view=auto
==============================================================================
---
incubator/hcatalog/branches/branch-0.3/src/java/org/apache/hcatalog/data/HCatRecordSerDe.java
(added)
+++
incubator/hcatalog/branches/branch-0.3/src/java/org/apache/hcatalog/data/HCatRecordSerDe.java
Thu Feb 2 17:23:58 2012
@@ -0,0 +1,277 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hcatalog.data;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+import java.util.Map;
+import java.util.Properties;
+import java.util.TreeMap;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.hive.serde.Constants;
+import org.apache.hadoop.hive.serde2.SerDe;
+import org.apache.hadoop.hive.serde2.SerDeException;
+import org.apache.hadoop.hive.serde2.SerDeStats;
+import org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.MapObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.StructField;
+import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category;
+import org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo;
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory;
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils;
+import org.apache.hadoop.io.Writable;
+import org.apache.hcatalog.common.HCatUtil;
+
+/**
+ * SerDe class for serializing to and from HCatRecord
+ */
+public class HCatRecordSerDe implements SerDe {
+
+ public static final Log LOG = LogFactory
+ .getLog(HCatRecordSerDe.class.getName());
+
+ public HCatRecordSerDe() throws SerDeException{
+ }
+
+ private List<String> columnNames;
+ private List<TypeInfo> columnTypes;
+ private StructTypeInfo rowTypeInfo;
+
+ private HCatRecordObjectInspector cachedObjectInspector;
+
+ @Override
+ public void initialize(Configuration conf, Properties tbl)
+ throws SerDeException {
+
+ if (LOG.isDebugEnabled()){
+ LOG.debug("Initializing HCatRecordSerDe");
+ HCatUtil.logEntrySet(LOG, "props to serde", tbl.entrySet());
+ }
+
+ // Get column names and types
+ String columnNameProperty = tbl.getProperty(Constants.LIST_COLUMNS);
+ String columnTypeProperty = tbl.getProperty(Constants.LIST_COLUMN_TYPES);
+
+ // all table column names
+ if (columnNameProperty.length() == 0) {
+ columnNames = new ArrayList<String>();
+ } else {
+ columnNames = Arrays.asList(columnNameProperty.split(","));
+ }
+
+ // all column types
+ if (columnTypeProperty.length() == 0) {
+ columnTypes = new ArrayList<TypeInfo>();
+ } else {
+ columnTypes =
TypeInfoUtils.getTypeInfosFromTypeString(columnTypeProperty);
+ }
+
+ if (LOG.isDebugEnabled()){
+ LOG.debug("columns:" + columnNameProperty);
+ for (String s : columnNames){
+ LOG.debug("cn:"+s);
+ }
+ LOG.debug("types: " + columnTypeProperty);
+ for (TypeInfo t : columnTypes){
+ LOG.debug("ct:"+t.getTypeName()+",type:"+t.getCategory());
+ }
+ }
+
+
+ assert (columnNames.size() == columnTypes.size());
+
+ rowTypeInfo = (StructTypeInfo)
TypeInfoFactory.getStructTypeInfo(columnNames, columnTypes);
+
+ cachedObjectInspector =
HCatRecordObjectInspectorFactory.getHCatRecordObjectInspector(rowTypeInfo);
+
+ }
+
+ /**
+ * The purpose of a deserialize method is to turn a data blob
+ * which is a writable representation of the data into an
+ * object that can then be parsed using the appropriate
+ * ObjectInspector. In this case, since HCatRecord is directly
+ * already the Writable object, there's no extra work to be done
+ * here. Most of the logic resides in the ObjectInspector to be
+ * able to return values from within the HCatRecord to hive when
+ * it wants it.
+ */
+ @Override
+ public Object deserialize(Writable data) throws SerDeException {
+ if (!(data instanceof HCatRecord)) {
+ throw new SerDeException(getClass().getName() + ": expects HCatRecord!");
+ }
+
+ return (HCatRecord) data;
+ }
+
+ /**
+ * The purpose of the serialize method is to turn an object-representation
+ * with a provided ObjectInspector into a Writable format, which
+ * the underlying layer can then use to write out.
+ *
+ * In this case, it means that Hive will call this method to convert
+ * an object with appropriate objectinspectors that it knows about,
+ * to write out a HCatRecord.
+ */
+ @Override
+ public Writable serialize(Object obj, ObjectInspector objInspector)
+ throws SerDeException {
+ if (objInspector.getCategory() != Category.STRUCT) {
+ throw new SerDeException(getClass().toString()
+ + " can only serialize struct types, but we got: "
+ + objInspector.getTypeName());
+ }
+ return new
DefaultHCatRecord((List<Object>)serializeStruct(obj,(StructObjectInspector)objInspector));
+ }
+
+
+ /**
+ * Return serialized HCatRecord from an underlying
+ * object-representation, and readable by an ObjectInspector
+ * @param obj : Underlying object-representation
+ * @param soi : StructObjectInspector
+ * @return HCatRecord
+ */
+ private List<?> serializeStruct(Object obj, StructObjectInspector soi)
+ throws SerDeException {
+
+ List<? extends StructField> fields = soi.getAllStructFieldRefs();
+ List<Object> list = soi.getStructFieldsDataAsList(obj);
+
+ List<Object> l = new ArrayList<Object>(fields.size());
+
+ if (fields != null){
+ for (int i = 0; i < fields.size(); i++) {
+
+ // Get the field objectInspector and the field object.
+ ObjectInspector foi = fields.get(i).getFieldObjectInspector();
+ Object f = (list == null ? null : list.get(i));
+ Object res = serializeField(f, foi);
+ l.add(i, res);
+ }
+ }
+ return l;
+ }
+
+ /**
+ * Return underlying Java Object from an object-representation
+ * that is readable by a provided ObjectInspector.
+ */
+ private Object serializeField(Object field,
+ ObjectInspector fieldObjectInspector) throws SerDeException {
+ Object res = null;
+ if (fieldObjectInspector.getCategory() == Category.PRIMITIVE){
+ res =
((PrimitiveObjectInspector)fieldObjectInspector).getPrimitiveJavaObject(field);
+ } else if (fieldObjectInspector.getCategory() == Category.STRUCT){
+ res = serializeStruct(field,(StructObjectInspector)fieldObjectInspector);
+ } else if (fieldObjectInspector.getCategory() == Category.LIST){
+ res = serializeList(field,(ListObjectInspector)fieldObjectInspector);
+ } else if (fieldObjectInspector.getCategory() == Category.MAP){
+ res = serializeMap(field,(MapObjectInspector)fieldObjectInspector);
+ } else {
+ throw new SerDeException(getClass().toString()
+ + " does not know what to do with fields of unknown category: "
+ + fieldObjectInspector.getCategory() + " , type: " +
fieldObjectInspector.getTypeName());
+ }
+ return res;
+ }
+
+ /**
+ * Helper method to return underlying Java Map from
+ * an object-representation that is readable by a provided
+ * MapObjectInspector
+ */
+ private Map<?,?> serializeMap(Object f, MapObjectInspector moi) throws
SerDeException {
+ ObjectInspector koi = moi.getMapKeyObjectInspector();
+ ObjectInspector voi = moi.getMapValueObjectInspector();
+ Map<Object,Object> m = new TreeMap<Object, Object>();
+
+ Map<?, ?> readMap = moi.getMap(f);
+ if (readMap == null) {
+ return null;
+ } else {
+ for (Map.Entry<?, ?> entry: readMap.entrySet()) {
+ m.put(serializeField(entry.getKey(),koi),
serializeField(entry.getValue(),voi));
+ }
+ }
+ return m;
+ }
+
+ private List<?> serializeList(Object f, ListObjectInspector loi) throws
SerDeException {
+ List l = loi.getList(f);
+ ObjectInspector eloi = loi.getListElementObjectInspector();
+ if (eloi.getCategory() == Category.PRIMITIVE){
+ return l;
+ } else if (eloi.getCategory() == Category.STRUCT){
+ List<List<?>> list = new ArrayList<List<?>>(l.size());
+ for (int i = 0 ; i < l.size() ; i++ ){
+ list.add(serializeStruct(l.get(i), (StructObjectInspector) eloi));
+ }
+ return list;
+ } else if (eloi.getCategory() == Category.LIST){
+ List<List<?>> list = new ArrayList<List<?>>(l.size());
+ for (int i = 0 ; i < l.size() ; i++ ){
+ list.add(serializeList(l.get(i), (ListObjectInspector) eloi));
+ }
+ } else if (eloi.getCategory() == Category.MAP){
+ List<Map<?,?>> list = new ArrayList<Map<?,?>>(l.size());
+ for (int i = 0 ; i < l.size() ; i++ ){
+ list.add(serializeMap(l.get(i), (MapObjectInspector) eloi));
+ }
+ throw new SerDeException("HCatSerDe map type unimplemented");
+ } else {
+ throw new SerDeException(getClass().toString()
+ + " does not know what to do with fields of unknown category: "
+ + eloi.getCategory() + " , type: " + eloi.getTypeName());
+ }
+ return l;
+ }
+
+ /**
+ * Return an object inspector that can read through the object
+ * that we return from deserialize(). To wit, that means we need
+ * to return an ObjectInspector that can read HCatRecord, given
+ * the type info for it during initialize(). This also means
+ * that this method cannot and should not be called before initialize()
+ */
+ @Override
+ public ObjectInspector getObjectInspector() throws SerDeException {
+ return (ObjectInspector) cachedObjectInspector;
+ }
+
+ @Override
+ public Class<? extends Writable> getSerializedClass() {
+ return HCatRecord.class;
+ }
+
+ @Override
+ public SerDeStats getSerDeStats() {
+ // no support for statistics yet
+ return null;
+ }
+
+}
Added:
incubator/hcatalog/branches/branch-0.3/src/test/org/apache/hcatalog/data/TestHCatRecordSerDe.java
URL:
http://svn.apache.org/viewvc/incubator/hcatalog/branches/branch-0.3/src/test/org/apache/hcatalog/data/TestHCatRecordSerDe.java?rev=1239733&view=auto
==============================================================================
---
incubator/hcatalog/branches/branch-0.3/src/test/org/apache/hcatalog/data/TestHCatRecordSerDe.java
(added)
+++
incubator/hcatalog/branches/branch-0.3/src/test/org/apache/hcatalog/data/TestHCatRecordSerDe.java
Thu Feb 2 17:23:58 2012
@@ -0,0 +1,144 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hcatalog.data;
+
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.Map.Entry;
+import java.util.Properties;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.hive.serde.Constants;
+import org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe;
+import org.apache.hadoop.io.Writable;
+
+import junit.framework.Assert;
+import junit.framework.TestCase;
+
+public class TestHCatRecordSerDe extends TestCase{
+
+ public Map<Properties,HCatRecord> getData(){
+ Map<Properties,HCatRecord> data = new HashMap<Properties,HCatRecord>();
+
+ List<Object> rlist = new ArrayList<Object>(11);
+ rlist.add(new Byte("123"));
+ rlist.add(new Short("456"));
+ rlist.add(new Integer(789));
+ rlist.add(new Long(1000L));
+ rlist.add(new Double(5.3D));
+ rlist.add(new Float(2.39F));
+ rlist.add(new String("hcat and hadoop"));
+ rlist.add(null);
+
+ List<Object> innerStruct = new ArrayList<Object>(2);
+ innerStruct.add(new String("abc"));
+ innerStruct.add(new String("def"));
+ rlist.add(innerStruct);
+
+ List<Integer> innerList = new ArrayList<Integer>();
+ innerList.add(314);
+ innerList.add(007);
+ rlist.add(innerList);
+
+ Map<Short, String> map = new HashMap<Short, String>(3);
+ map.put(new Short("2"), "hcat is cool");
+ map.put(new Short("3"), "is it?");
+ map.put(new Short("4"), "or is it not?");
+ rlist.add(map);
+
+ rlist.add(new Boolean(true));
+
+ List<Object> c1 = new ArrayList<Object>();
+ List<Object> c1_1 = new ArrayList<Object>();
+ c1_1.add(new Integer(12));
+ List<Object> i2 = new ArrayList<Object>();
+ List<Integer> ii1 = new ArrayList<Integer>();
+ ii1.add(new Integer(13));
+ ii1.add(new Integer(14));
+ i2.add(ii1);
+ Map<String,List<?>> ii2 = new HashMap<String,List<?>>();
+ List<Integer> iii1 = new ArrayList<Integer>();
+ iii1.add(new Integer(15));
+ ii2.put("phew", iii1);
+ i2.add(ii2);
+ c1_1.add(i2);
+ c1.add(c1_1);
+ rlist.add(c1);
+
+ String typeString =
+ "tinyint,smallint,int,bigint,double,float,string,string,"
+ + "struct<a:string,b:string>,array<int>,map<smallint,string>,boolean,"
+ +
"array<struct<i1:int,i2:struct<ii1:array<int>,ii2:map<string,struct<iii1:int>>>>>";
+ Properties props = new Properties();
+
+ props.put(Constants.LIST_COLUMNS, "ti,si,i,bi,d,f,s,n,r,l,m,b,c1");
+ props.put(Constants.LIST_COLUMN_TYPES, typeString);
+
+ data.put(props, new DefaultHCatRecord(rlist));
+ return data;
+ }
+
+ public void testRW() throws Exception {
+
+ Configuration conf = new Configuration();
+
+ for (Entry<Properties,HCatRecord> e : getData().entrySet()){
+ Properties tblProps = e.getKey();
+ HCatRecord r = e.getValue();
+
+ HCatRecordSerDe hrsd = new HCatRecordSerDe();
+ hrsd.initialize(conf, tblProps);
+
+ System.out.println("ORIG:"+r.toString());
+
+ Writable s = hrsd.serialize(r,hrsd.getObjectInspector());
+ System.out.println("ONE:"+s.toString());
+
+ HCatRecord r2 = (HCatRecord) hrsd.deserialize(s);
+ Assert.assertTrue(r.equals(r2));
+
+ // If it went through correctly, then s is also a HCatRecord,
+ // and also equal to the above, and a deepcopy, and this holds
+ // through for multiple levels more of serialization as well.
+
+ Writable s2 = hrsd.serialize(s, hrsd.getObjectInspector());
+ System.out.println("TWO:"+s2.toString());
+ Assert.assertTrue(r.equals((HCatRecord)s));
+ Assert.assertTrue(r.equals((HCatRecord)s2));
+
+ // serialize using another serde, and read out that object repr.
+ LazySimpleSerDe testSD = new LazySimpleSerDe();
+ testSD.initialize(conf, tblProps);
+
+ Writable s3 = testSD.serialize(s, hrsd.getObjectInspector());
+ System.out.println("THREE:"+s3.toString());
+ Object o3 = testSD.deserialize(s3);
+ Assert.assertFalse(r.getClass().equals(o3.getClass()));
+
+ // then serialize again using hrsd, and compare results
+ HCatRecord s4 = (HCatRecord) hrsd.serialize(o3,
testSD.getObjectInspector());
+ System.out.println("FOUR:"+s4.toString());
+ Assert.assertFalse(r.equals(s4));
+
+ }
+
+ }
+
+}