My team and I have been trying, with limited success, to use the COMMENT feature of hive columns to maintain documentation for the tables and columns in our data-warehouse built on hive. However, we use a number of custom and non-native SerDes, and what happens to those tables is that the comments always get overwritten with the string "from deserializer".

I've possibly found a way to work around this from within hive but I want to get some insight from the hive-dev community to figure out whether or not this is a patently bad idea and we are just setting ourselves up for pain later on.

I won't go into all the details but it seems to work in our (so far) limited testing. However, we are using hive 0.7.1 and the patch I am sending is against master/HEAD.

Please let me know if this is an acceptable approach to preserving column comments with non-native SerDes or not!


diff --git a/metastore/src/java/org/apache/hadoop/hive/metastore/HiveMetaStore.java b/metastore/src/java/org/apache/hadoop/hive/metastore/HiveMetaStore.java
index 7524484..7ea77f1 100644
--- a/metastore/src/java/org/apache/hadoop/hive/metastore/HiveMetaStore.java
+++ b/metastore/src/java/org/apache/hadoop/hive/metastore/HiveMetaStore.java
@@ -1947,19 +1947,7 @@ public class HiveMetaStore extends ThriftHiveMetastore {
         } catch (NoSuchObjectException e) {
           throw new UnknownTableException(e.getMessage());
         }
-        boolean getColsFromSerDe = SerDeUtils.shouldGetColsFromSerDe(
-            tbl.getSd().getSerdeInfo().getSerializationLib());
-        if (!getColsFromSerDe) {
-          ret = tbl.getSd().getCols();
-        } else {
-          try {
-            Deserializer s = MetaStoreUtils.getDeserializer(hiveConf, tbl);
-            ret = MetaStoreUtils.getFieldsFromDeserializer(tableName, s);
-          } catch (SerDeException e) {
-            StringUtils.stringifyException(e);
-            throw new MetaException(e.getMessage());
-          }
-        }
+        ret = tbl.getSd().getCols();
       } finally {
         endFunction("get_fields", ret != null);
       }
diff --git a/metastore/src/java/org/apache/hadoop/hive/metastore/MetaStoreUtils.java b/metastore/src/java/org/apache/hadoop/hive/metastore/MetaStoreUtils.java
index 86c7205..e872cdc 100644
--- a/metastore/src/java/org/apache/hadoop/hive/metastore/MetaStoreUtils.java
+++ b/metastore/src/java/org/apache/hadoop/hive/metastore/MetaStoreUtils.java
@@ -45,6 +45,8 @@ import org.apache.hadoop.hive.metastore.api.MetaException;
 import org.apache.hadoop.hive.metastore.api.SerDeInfo;
 import org.apache.hadoop.hive.metastore.api.StorageDescriptor;
 import org.apache.hadoop.hive.metastore.api.Table;
+import org.apache.hadoop.hive.ql.metadata.Hive;
+import org.apache.hadoop.hive.ql.metadata.HiveException;
 import org.apache.hadoop.hive.serde2.Deserializer;
 import org.apache.hadoop.hive.serde2.SerDeException;
 import org.apache.hadoop.hive.serde2.SerDeUtils;
@@ -795,6 +797,11 @@ public class MetaStoreUtils {
    */
   public static List<FieldSchema> getFieldsFromDeserializer(String tableName,
       Deserializer deserializer) throws SerDeException, MetaException {
+    try {
+      return Hive.get().getTable(tableName).getTTable().getSd().getCols();
+    } catch (HiveException e) {
+      // can't get the schema that way? do things the old way, then.
+    }
     ObjectInspector oi = deserializer.getObjectInspector();
     String[] names = tableName.split("\\.");
     String last_name = names[names.length - 1];
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/metadata/Table.java b/ql/src/java/org/apache/hadoop/hive/ql/metadata/Table.java
index 6b432ac..9e543d7 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/metadata/Table.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/metadata/Table.java
@@ -481,18 +481,7 @@ public class Table implements Serializable {
   }
 
   public List<FieldSchema> getCols() {
-    boolean getColsFromSerDe = SerDeUtils.shouldGetColsFromSerDe(
-      getSerializationLib());
-    if (!getColsFromSerDe) {
-      return tTable.getSd().getCols();
-    } else {
-      try {
-        return Hive.getFieldsFromDeserializer(getTableName(), getDeserializer());
-      } catch (HiveException e) {
-        LOG.error("Unable to get field from serde: " + getSerializationLib(), e);
-      }
-      return new ArrayList<FieldSchema>();
-    }
+    return tTable.getSd().getCols();
   }
 
   /**

Reply via email to