Repository: hive Updated Branches: refs/heads/master aeb837727 -> 51067945c
HIVE-17274: RowContainer spills for timestamp column throws exception (Prasanth Jayachandran reviewed by Matt McCline) Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/51067945 Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/51067945 Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/51067945 Branch: refs/heads/master Commit: 51067945cc1bcd4deeb34ca93c9f73efed704d4a Parents: aeb8377 Author: Prasanth Jayachandran <prasan...@apache.org> Authored: Fri Aug 11 15:37:56 2017 -0700 Committer: Prasanth Jayachandran <prasan...@apache.org> Committed: Fri Aug 11 15:37:56 2017 -0700 ---------------------------------------------------------------------- .../hive/ql/exec/persistence/RowContainer.java | 10 ++- .../ql/exec/persistence/TestRowContainer.java | 75 ++++++++++++++++++++ 2 files changed, 84 insertions(+), 1 deletion(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hive/blob/51067945/ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/RowContainer.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/RowContainer.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/RowContainer.java index c8a1a0d..d058851 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/RowContainer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/RowContainer.java @@ -23,6 +23,7 @@ import java.io.IOException; import java.util.ArrayList; import java.util.List; +import org.apache.commons.codec.digest.DigestUtils; import org.apache.hadoop.fs.CommonConfigurationKeysPublic; import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; import org.slf4j.Logger; @@ -520,7 +521,10 @@ public class RowContainer<ROW extends List<Object>> String suffix = ".tmp"; if (this.keyObject != null) { - suffix = "." + this.keyObject.toString() + suffix; + String keyObjectStr = this.keyObject.toString(); + String md5Str = DigestUtils.md5Hex(keyObjectStr.toString()); + LOG.info("Using md5Str: " + md5Str + " for keyObject: " + keyObjectStr); + suffix = "." + md5Str + suffix; } parentDir = FileUtils.createLocalDirsTempFile(spillFileDirs, "hive-rowcontainer", "", true); @@ -610,4 +614,8 @@ public class RowContainer<ROW extends List<Object>> protected int getLastActualSplit() { return actualSplitNum - 1; } + + public int getNumFlushedBlocks() { + return numFlushedBlocks; + } } http://git-wip-us.apache.org/repos/asf/hive/blob/51067945/ql/src/test/org/apache/hadoop/hive/ql/exec/persistence/TestRowContainer.java ---------------------------------------------------------------------- diff --git a/ql/src/test/org/apache/hadoop/hive/ql/exec/persistence/TestRowContainer.java b/ql/src/test/org/apache/hadoop/hive/ql/exec/persistence/TestRowContainer.java new file mode 100644 index 0000000..76ab315 --- /dev/null +++ b/ql/src/test/org/apache/hadoop/hive/ql/exec/persistence/TestRowContainer.java @@ -0,0 +1,75 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.persistence; + +import static org.junit.Assert.assertEquals; + +import java.io.IOException; +import java.sql.Timestamp; +import java.util.ArrayList; +import java.util.List; +import java.util.Properties; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.serde.serdeConstants; +import org.apache.hadoop.hive.serde2.SerDeException; +import org.apache.hadoop.hive.serde2.SerDeUtils; +import org.apache.hadoop.hive.serde2.io.TimestampWritable; +import org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils; +import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; +import org.apache.hadoop.io.Text; +import org.apache.hadoop.io.Writable; +import org.junit.Test; + +import com.google.common.collect.Lists; + +public class TestRowContainer { + + @Test + public void testSpillTimestamp() throws HiveException, SerDeException, IOException { + int blockSize = 10; + Configuration cfg = new Configuration(); + RowContainer result = new RowContainer(blockSize, cfg, null); + LazyBinarySerDe serde = new LazyBinarySerDe(); + Properties props = new Properties(); + props.put(serdeConstants.LIST_COLUMNS, "x"); + props.put(serdeConstants.LIST_COLUMN_TYPES, "array<string>"); + SerDeUtils.initializeSerDe(serde, null, props, null); + result.setSerDe(serde, + ObjectInspectorUtils.getStandardObjectInspector(serde.getObjectInspector())); + result.setTableDesc( + PTFRowContainer.createTableDesc((StructObjectInspector) serde.getObjectInspector())); + TimestampWritable key = new TimestampWritable(new Timestamp(10)); + result.setKeyObject(Lists.newArrayList(key)); + List<Writable> row; + // will trigger 2 spills + for (int i = 0; i <= blockSize * 2; i++) { + row = new ArrayList<Writable>(); + row.add(new Text("" + i)); + result.addRow(row); + } + assertEquals(2, result.getNumFlushedBlocks()); + result.setKeyObject(null); + assertEquals(Lists.newArrayList(0).toString(), result.first().get(0).toString()); + for (int i = 1; i < result.rowCount() - 1; i++) { + assertEquals(Lists.newArrayList(i).toString(), result.next().get(0).toString()); + } + result.close(); + } +}