Author: gates
Date: Mon Jul 30 19:01:12 2012
New Revision: 1367226
URL: http://svn.apache.org/viewvc?rev=1367226&view=rev
Log:
HCATALOG-436 JSON SerDe column misnaming on CTAS
Modified:
incubator/hcatalog/branches/branch-0.4/CHANGES.txt
incubator/hcatalog/branches/branch-0.4/src/java/org/apache/hcatalog/data/JsonSerDe.java
incubator/hcatalog/branches/branch-0.4/src/test/org/apache/hcatalog/data/TestJsonSerDe.java
Modified: incubator/hcatalog/branches/branch-0.4/CHANGES.txt
URL:
http://svn.apache.org/viewvc/incubator/hcatalog/branches/branch-0.4/CHANGES.txt?rev=1367226&r1=1367225&r2=1367226&view=diff
==============================================================================
--- incubator/hcatalog/branches/branch-0.4/CHANGES.txt (original)
+++ incubator/hcatalog/branches/branch-0.4/CHANGES.txt Mon Jul 30 19:01:12 2012
@@ -51,6 +51,8 @@ Trunk (unreleased changes)
OPTIMIZATIONS
BUG FIXES
+ HCAT-436 JSON SerDe column misnaming on CTAS (khorgath via gates)
+
HCAT-449 HCatLoader is mistakenly identifying Configuration parameters to
store (cdrome via traviscrawford)
HCAT-452 HCat_Drop_Table_3 does not get initialized properly (cdrome via
toffer)
Modified:
incubator/hcatalog/branches/branch-0.4/src/java/org/apache/hcatalog/data/JsonSerDe.java
URL:
http://svn.apache.org/viewvc/incubator/hcatalog/branches/branch-0.4/src/java/org/apache/hcatalog/data/JsonSerDe.java?rev=1367226&r1=1367225&r2=1367226&view=diff
==============================================================================
---
incubator/hcatalog/branches/branch-0.4/src/java/org/apache/hcatalog/data/JsonSerDe.java
(original)
+++
incubator/hcatalog/branches/branch-0.4/src/java/org/apache/hcatalog/data/JsonSerDe.java
Mon Jul 30 19:01:12 2012
@@ -26,8 +26,11 @@ import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.Properties;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.serde.Constants;
import org.apache.hadoop.hive.serde2.SerDe;
import org.apache.hadoop.hive.serde2.SerDeException;
@@ -167,10 +170,42 @@ public class JsonSerDe implements SerDe
throw new IOException("Field name expected");
}
String fieldName = p.getText();
- int fpos = s.getPosition(fieldName);
+ int fpos;
+ try {
+ fpos = s.getPosition(fieldName);
+ } catch (NullPointerException npe){
+ fpos = getPositionFromHiveInternalColumnName(fieldName);
+ LOG.debug("NPE finding position for field [{}] in schema
[{}]",fieldName,s);
+ if (!fieldName.equalsIgnoreCase(getHiveInternalColumnName(fpos))){
+ LOG.error("Hive internal column name {} and position "
+ +"encoding {} for the column name are at odds",fieldName,fpos);
+ throw npe;
+ }
+ if (fpos == -1){
+ return; // unknown field, we return.
+ }
+ }
HCatFieldSchema hcatFieldSchema = s.getFields().get(fpos);
+ Object currField = extractCurrentField(p, null, hcatFieldSchema,false);
+ r.set(fpos,currField);
+ }
- r.set(fpos,extractCurrentField(p, null, hcatFieldSchema,false));
+ public String getHiveInternalColumnName(int fpos) {
+ return HiveConf.getColumnInternalName(fpos);
+ }
+
+ public int getPositionFromHiveInternalColumnName(String internalName) {
+// return HiveConf.getPositionFromInternalName(fieldName);
+ // The above line should have been all the implementation that
+ // we need, but due to a bug in that impl which recognizes
+ // only single-digit columns, we need another impl here.
+ Pattern internalPattern = Pattern.compile("_col([0-9]+)");
+ Matcher m = internalPattern.matcher(internalName);
+ if (!m.matches()){
+ return -1;
+ } else {
+ return Integer.parseInt(m.group(1));
+ }
}
/**
Modified:
incubator/hcatalog/branches/branch-0.4/src/test/org/apache/hcatalog/data/TestJsonSerDe.java
URL:
http://svn.apache.org/viewvc/incubator/hcatalog/branches/branch-0.4/src/test/org/apache/hcatalog/data/TestJsonSerDe.java?rev=1367226&r1=1367225&r2=1367226&view=diff
==============================================================================
---
incubator/hcatalog/branches/branch-0.4/src/test/org/apache/hcatalog/data/TestJsonSerDe.java
(original)
+++
incubator/hcatalog/branches/branch-0.4/src/test/org/apache/hcatalog/data/TestJsonSerDe.java
Mon Jul 30 19:01:12 2012
@@ -26,6 +26,7 @@ import java.util.Properties;
import junit.framework.TestCase;
import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.serde.Constants;
import org.apache.hadoop.io.Writable;
import org.slf4j.Logger;
@@ -146,4 +147,67 @@ public class TestJsonSerDe extends TestC
}
+ public void testRobustRead() throws Exception {
+ /**
+ * This test has been added to account for HCATALOG-436
+ * We write out columns with "internal column names" such
+ * as "_col0", but try to read with retular column names.
+ */
+
+ Configuration conf = new Configuration();
+
+ for (Pair<Properties,HCatRecord> e : getData()){
+ Properties tblProps = e.first;
+ HCatRecord r = e.second;
+
+ Properties internalTblProps = new Properties();
+ for (Map.Entry pe : tblProps.entrySet()){
+ if (!pe.getKey().equals(Constants.LIST_COLUMNS)){
+ internalTblProps.put(pe.getKey(), pe.getValue());
+ } else {
+ internalTblProps.put(pe.getKey(),getInternalNames((String)
pe.getValue()));
+ }
+ }
+
+ LOG.info("orig tbl props:{}",tblProps);
+ LOG.info("modif tbl props:{}",internalTblProps);
+
+ JsonSerDe wjsd = new JsonSerDe();
+ wjsd.initialize(conf, internalTblProps);
+
+ JsonSerDe rjsd = new JsonSerDe();
+ rjsd.initialize(conf, tblProps);
+
+ LOG.info("ORIG:{}",r);
+
+ Writable s = wjsd.serialize(r,wjsd.getObjectInspector());
+ LOG.info("ONE:{}",s);
+
+ Object o1 = wjsd.deserialize(s);
+ LOG.info("deserialized ONE : {} ", o1);
+
+ Object o2 = rjsd.deserialize(s);
+ LOG.info("deserialized TWO : {} ", o2);
+ assertTrue(HCatDataCheckUtil.recordsEqual(r, (HCatRecord) o2));
+ }
+
+ }
+
+ String getInternalNames(String columnNames){
+ if (columnNames == null) {
+ return null;
+ }
+ if (columnNames.isEmpty()) {
+ return "";
+ }
+
+ StringBuffer sb = new StringBuffer();
+ int numStrings = columnNames.split(",").length;
+ sb.append("_col0");
+ for (int i = 1; i < numStrings ; i++ ){
+ sb.append(",");
+ sb.append(HiveConf.getColumnInternalName(i));
+ }
+ return sb.toString();
+ }
}