HIVE-5545 : HCatRecord getInteger method returns String when used on Partition columns of type INT (Sushanth Sowmyan, reviewed by Jason Dere)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/3f72f818 Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/3f72f818 Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/3f72f818 Branch: refs/heads/llap Commit: 3f72f8187e27e1332fa980bcf7bce6a32b6f2229 Parents: 9803344 Author: Sushanth Sowmyan <[email protected]> Authored: Tue May 5 02:35:31 2015 -0700 Committer: Sushanth Sowmyan <[email protected]> Committed: Tue May 5 02:38:03 2015 -0700 ---------------------------------------------------------------------- .../hcatalog/mapreduce/HCatBaseInputFormat.java | 30 ++++++++++++++---- .../hcatalog/mapreduce/HCatRecordReader.java | 4 +-- .../hcatalog/mapreduce/HCatMapReduceTest.java | 1 + .../hcatalog/mapreduce/TestHCatPartitioned.java | 32 +++++++++----------- 4 files changed, 42 insertions(+), 25 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hive/blob/3f72f818/hcatalog/core/src/main/java/org/apache/hive/hcatalog/mapreduce/HCatBaseInputFormat.java ---------------------------------------------------------------------- diff --git a/hcatalog/core/src/main/java/org/apache/hive/hcatalog/mapreduce/HCatBaseInputFormat.java b/hcatalog/core/src/main/java/org/apache/hive/hcatalog/mapreduce/HCatBaseInputFormat.java index 33807f5..55b97dd 100644 --- a/hcatalog/core/src/main/java/org/apache/hive/hcatalog/mapreduce/HCatBaseInputFormat.java +++ b/hcatalog/core/src/main/java/org/apache/hive/hcatalog/mapreduce/HCatBaseInputFormat.java @@ -30,6 +30,10 @@ import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.hive.ql.metadata.HiveStorageHandler; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorConverters; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils; import org.apache.hadoop.io.WritableComparable; import org.apache.hadoop.io.Writable; import org.apache.hadoop.mapred.JobConf; @@ -43,6 +47,7 @@ import org.apache.hadoop.util.ReflectionUtils; import org.apache.hadoop.util.StringUtils; import org.apache.hive.hcatalog.common.HCatConstants; +import org.apache.hive.hcatalog.common.HCatException; import org.apache.hive.hcatalog.common.HCatUtil; import org.apache.hive.hcatalog.data.HCatRecord; import org.apache.hive.hcatalog.data.schema.HCatFieldSchema; @@ -195,7 +200,7 @@ public abstract class HCatBaseInputFormat Map<String, String> jobProperties = partitionInfo.getJobProperties(); HCatUtil.copyJobPropertiesToJobConf(jobProperties, jobConf); - Map<String, String> valuesNotInDataCols = getColValsNotInDataColumns( + Map<String, Object> valuesNotInDataCols = getColValsNotInDataColumns( getOutputSchema(conf), partitionInfo ); @@ -206,17 +211,30 @@ public abstract class HCatBaseInputFormat /** * gets values for fields requested by output schema which will not be in the data */ - private static Map<String, String> getColValsNotInDataColumns(HCatSchema outputSchema, - PartInfo partInfo) { + private static Map<String, Object> getColValsNotInDataColumns(HCatSchema outputSchema, + PartInfo partInfo) throws HCatException { HCatSchema dataSchema = partInfo.getPartitionSchema(); - Map<String, String> vals = new HashMap<String, String>(); + Map<String, Object> vals = new HashMap<String, Object>(); for (String fieldName : outputSchema.getFieldNames()) { if (dataSchema.getPosition(fieldName) == null) { // this entry of output is not present in the output schema // so, we first check the table schema to see if it is a part col - if (partInfo.getPartitionValues().containsKey(fieldName)) { - vals.put(fieldName, partInfo.getPartitionValues().get(fieldName)); + + // First, get the appropriate field schema for this field + HCatFieldSchema fschema = outputSchema.get(fieldName); + + // For a partition key type, this will be a primitive typeinfo. + // Obtain relevant object inspector for this typeinfo + ObjectInspector oi = TypeInfoUtils.getStandardJavaObjectInspectorFromTypeInfo(fschema.getTypeInfo()); + + // get appropriate object from the string representation of the value in partInfo.getPartitionValues() + // Essentially, partition values are represented as strings, but we want the actual object type associated + Object objVal = ObjectInspectorConverters + .getConverter(PrimitiveObjectInspectorFactory.javaStringObjectInspector, oi) + .convert(partInfo.getPartitionValues().get(fieldName)); + + vals.put(fieldName, objVal); } else { vals.put(fieldName, null); } http://git-wip-us.apache.org/repos/asf/hive/blob/3f72f818/hcatalog/core/src/main/java/org/apache/hive/hcatalog/mapreduce/HCatRecordReader.java ---------------------------------------------------------------------- diff --git a/hcatalog/core/src/main/java/org/apache/hive/hcatalog/mapreduce/HCatRecordReader.java b/hcatalog/core/src/main/java/org/apache/hive/hcatalog/mapreduce/HCatRecordReader.java index 3ee6157..7aecb0f 100644 --- a/hcatalog/core/src/main/java/org/apache/hive/hcatalog/mapreduce/HCatRecordReader.java +++ b/hcatalog/core/src/main/java/org/apache/hive/hcatalog/mapreduce/HCatRecordReader.java @@ -63,7 +63,7 @@ class HCatRecordReader extends RecordReader<WritableComparable, HCatRecord> { private Deserializer deserializer; - private Map<String, String> valuesNotInDataCols; + private Map<String, Object> valuesNotInDataCols; private HCatSchema outputSchema = null; private HCatSchema dataSchema = null; @@ -72,7 +72,7 @@ class HCatRecordReader extends RecordReader<WritableComparable, HCatRecord> { * Instantiates a new hcat record reader. */ public HCatRecordReader(HiveStorageHandler storageHandler, - Map<String, String> valuesNotInDataCols) { + Map<String, Object> valuesNotInDataCols) { this.storageHandler = storageHandler; this.valuesNotInDataCols = valuesNotInDataCols; } http://git-wip-us.apache.org/repos/asf/hive/blob/3f72f818/hcatalog/core/src/test/java/org/apache/hive/hcatalog/mapreduce/HCatMapReduceTest.java ---------------------------------------------------------------------- diff --git a/hcatalog/core/src/test/java/org/apache/hive/hcatalog/mapreduce/HCatMapReduceTest.java b/hcatalog/core/src/test/java/org/apache/hive/hcatalog/mapreduce/HCatMapReduceTest.java index ee57f3f..c98d947 100644 --- a/hcatalog/core/src/test/java/org/apache/hive/hcatalog/mapreduce/HCatMapReduceTest.java +++ b/hcatalog/core/src/test/java/org/apache/hive/hcatalog/mapreduce/HCatMapReduceTest.java @@ -381,6 +381,7 @@ public abstract class HCatMapReduceTest extends HCatBaseTest { readRecords.clear(); Configuration conf = new Configuration(); + conf.set(HiveConf.ConfVars.METASTORE_INTEGER_JDO_PUSHDOWN.varname,"true"); Job job = new Job(conf, "hcat mapreduce read test"); job.setJarByClass(this.getClass()); job.setMapperClass(HCatMapReduceTest.MapRead.class); http://git-wip-us.apache.org/repos/asf/hive/blob/3f72f818/hcatalog/core/src/test/java/org/apache/hive/hcatalog/mapreduce/TestHCatPartitioned.java ---------------------------------------------------------------------- diff --git a/hcatalog/core/src/test/java/org/apache/hive/hcatalog/mapreduce/TestHCatPartitioned.java b/hcatalog/core/src/test/java/org/apache/hive/hcatalog/mapreduce/TestHCatPartitioned.java index a386415..d31f29c 100644 --- a/hcatalog/core/src/test/java/org/apache/hive/hcatalog/mapreduce/TestHCatPartitioned.java +++ b/hcatalog/core/src/test/java/org/apache/hive/hcatalog/mapreduce/TestHCatPartitioned.java @@ -35,7 +35,6 @@ import org.apache.hive.hcatalog.data.HCatRecord; import org.apache.hive.hcatalog.data.schema.HCatFieldSchema; import org.apache.hive.hcatalog.data.schema.HCatSchema; import org.apache.hive.hcatalog.data.schema.HCatSchemaUtils; -import org.junit.BeforeClass; import org.junit.Test; import static junit.framework.Assert.assertEquals; @@ -73,7 +72,7 @@ public class TestHCatPartitioned extends HCatMapReduceTest { List<FieldSchema> fields = new ArrayList<FieldSchema>(); //Defining partition names in unsorted order fields.add(new FieldSchema("PaRT1", serdeConstants.STRING_TYPE_NAME, "")); - fields.add(new FieldSchema("part0", serdeConstants.STRING_TYPE_NAME, "")); + fields.add(new FieldSchema("part0", serdeConstants.INT_TYPE_NAME, "")); return fields; } @@ -85,19 +84,18 @@ public class TestHCatPartitioned extends HCatMapReduceTest { return fields; } - @Test public void testHCatPartitionedTable() throws Exception { Map<String, String> partitionMap = new HashMap<String, String>(); partitionMap.put("part1", "p1value1"); - partitionMap.put("part0", "p0value1"); + partitionMap.put("part0", "501"); runMRCreate(partitionMap, partitionColumns, writeRecords, 10, true); partitionMap.clear(); partitionMap.put("PART1", "p1value2"); - partitionMap.put("PART0", "p0value2"); + partitionMap.put("PART0", "502"); runMRCreate(partitionMap, partitionColumns, writeRecords, 20, true); @@ -121,7 +119,7 @@ public class TestHCatPartitioned extends HCatMapReduceTest { exc = null; partitionMap.clear(); partitionMap.put("px1", "p1value2"); - partitionMap.put("px0", "p0value2"); + partitionMap.put("px0", "502"); try { Job j = runMRCreate(partitionMap, partitionColumns, writeRecords, 20, true); @@ -136,7 +134,7 @@ public class TestHCatPartitioned extends HCatMapReduceTest { //Test for publish with missing partition key values exc = null; partitionMap.clear(); - partitionMap.put("px", "p1value2"); + partitionMap.put("px", "512"); try { runMRCreate(partitionMap, partitionColumns, writeRecords, 20, true); @@ -171,17 +169,17 @@ public class TestHCatPartitioned extends HCatMapReduceTest { //Read with partition filter runMRRead(10, "part1 = \"p1value1\""); - runMRRead(10, "part0 = \"p0value1\""); + runMRRead(10, "part0 = \"501\""); if (isTableImmutable()){ runMRRead(20, "part1 = \"p1value2\""); runMRRead(30, "part1 = \"p1value1\" or part1 = \"p1value2\""); - runMRRead(20, "part0 = \"p0value2\""); - runMRRead(30, "part0 = \"p0value1\" or part0 = \"p0value2\""); + runMRRead(20, "part0 = \"502\""); + runMRRead(30, "part0 = \"501\" or part0 = \"502\""); } else { runMRRead(40, "part1 = \"p1value2\""); runMRRead(50, "part1 = \"p1value1\" or part1 = \"p1value2\""); - runMRRead(40, "part0 = \"p0value2\""); - runMRRead(50, "part0 = \"p0value1\" or part0 = \"p0value2\""); + runMRRead(40, "part0 = \"502\""); + runMRRead(50, "part0 = \"501\" or part0 = \"502\""); } tableSchemaTest(); @@ -214,7 +212,7 @@ public class TestHCatPartitioned extends HCatMapReduceTest { Map<String, String> partitionMap = new HashMap<String, String>(); partitionMap.put("part1", "p1value5"); - partitionMap.put("part0", "p0value5"); + partitionMap.put("part0", "505"); runMRCreate(partitionMap, partitionColumns, writeRecords, 10, true); @@ -231,7 +229,7 @@ public class TestHCatPartitioned extends HCatMapReduceTest { //Test that changing column data type fails partitionMap.clear(); partitionMap.put("part1", "p1value6"); - partitionMap.put("part0", "p0value6"); + partitionMap.put("part0", "506"); partitionColumns = new ArrayList<HCatFieldSchema>(); partitionColumns.add(HCatSchemaUtils.getHCatFieldSchema(new FieldSchema("c1", serdeConstants.INT_TYPE_NAME, ""))); @@ -276,7 +274,7 @@ public class TestHCatPartitioned extends HCatMapReduceTest { List<HCatRecord> records = runMRRead(20, "part1 = \"p1value6\""); assertEquals(20, records.size()); - records = runMRRead(20, "part0 = \"p0value6\""); + records = runMRRead(20, "part0 = \"506\""); assertEquals(20, records.size()); Integer i = 0; for (HCatRecord rec : records) { @@ -285,7 +283,7 @@ public class TestHCatPartitioned extends HCatMapReduceTest { assertEquals(rec.get(1), "c2value" + i); assertEquals(rec.get(2), "c3value" + i); assertEquals(rec.get(3), "p1value6"); - assertEquals(rec.get(4), "p0value6"); + assertEquals(rec.get(4), 506); i++; } } @@ -317,7 +315,7 @@ public class TestHCatPartitioned extends HCatMapReduceTest { Map<String, String> partitionMap = new HashMap<String, String>(); partitionMap.put("part1", "p1value8"); - partitionMap.put("part0", "p0value8"); + partitionMap.put("part0", "508"); Exception exc = null; try {
