HIVE-18209: Fix API call in VectorizedListColumnReader to get value from BytesColumnVector (Colin Ma, reviewed by Ferdinand Xu)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/11227eba Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/11227eba Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/11227eba Branch: refs/heads/standalone-metastore Commit: 11227ebab390df10970fb8ef61f3e24421d6c66e Parents: 7acc4ce Author: Ferdinand Xu <[email protected]> Authored: Mon Dec 18 10:01:13 2017 +0800 Committer: Ferdinand Xu <[email protected]> Committed: Mon Dec 18 10:01:13 2017 +0800 ---------------------------------------------------------------------- .../vector/VectorizedListColumnReader.java | 3 +- .../parquet/TestVectorizedListColumnReader.java | 34 +++++++++++++++++++- .../parquet/VectorizedColumnReaderTestBase.java | 1 + 3 files changed, 36 insertions(+), 2 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hive/blob/11227eba/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/vector/VectorizedListColumnReader.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/vector/VectorizedListColumnReader.java b/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/vector/VectorizedListColumnReader.java index ea4f2f2..12af77c 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/vector/VectorizedListColumnReader.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/vector/VectorizedListColumnReader.java @@ -258,7 +258,8 @@ public class VectorizedListColumnReader extends BaseVectorizedColumnReader { lcv.child = new BytesColumnVector(total); lcv.child.init(); for (int i = 0; i < valueList.size(); i++) { - ((BytesColumnVector)lcv.child).setVal(i, ((List<byte[]>)valueList).get(i)); + byte[] src = ((List<byte[]>)valueList).get(i); + ((BytesColumnVector)lcv.child).setRef(i, src, 0, src.length); } break; case FLOAT: http://git-wip-us.apache.org/repos/asf/hive/blob/11227eba/ql/src/test/org/apache/hadoop/hive/ql/io/parquet/TestVectorizedListColumnReader.java ---------------------------------------------------------------------- diff --git a/ql/src/test/org/apache/hadoop/hive/ql/io/parquet/TestVectorizedListColumnReader.java b/ql/src/test/org/apache/hadoop/hive/ql/io/parquet/TestVectorizedListColumnReader.java index de19615..8ea5d25 100644 --- a/ql/src/test/org/apache/hadoop/hive/ql/io/parquet/TestVectorizedListColumnReader.java +++ b/ql/src/test/org/apache/hadoop/hive/ql/io/parquet/TestVectorizedListColumnReader.java @@ -72,8 +72,9 @@ public class TestVectorizedListColumnReader extends VectorizedColumnReaderTestBa } } for (int j = 0; j < listMaxSize; j++) { - group.append("list_int32_field_for_repeat_test", getIntValue(isDictionaryEncoding, j)); + group.append("list_binary_field_for_repeat_test", getBinaryValue(isDictionaryEncoding, i)); } + writer.write(group); } writer.close(); @@ -157,6 +158,14 @@ public class TestVectorizedListColumnReader extends VectorizedColumnReaderTestBa removeFile(); } + @Test + public void testUnrepeatedStringWithoutNullListRead() throws Exception { + removeFile(); + writeListData(initWriterFromFile(), false, 1025); + testUnRepeateStringWithoutNullListRead(); + removeFile(); + } + private void testListReadAllType(boolean isDictionaryEncoding, int elementNum) throws Exception { testListRead(isDictionaryEncoding, "int", elementNum); testListRead(isDictionaryEncoding, "long", elementNum); @@ -250,6 +259,10 @@ public class TestVectorizedListColumnReader extends VectorizedColumnReaderTestBa try { while (reader.next(NullWritable.get(), previous)) { ListColumnVector vector = (ListColumnVector) previous.cols[0]; + + //since Repeating only happens when offset length is 1. + assertEquals((vector.offsets.length == 1),vector.isRepeating); + for (int i = 0; i < vector.offsets.length; i++) { if (row == elementNum) { assertEquals(i, vector.offsets.length - 1); @@ -305,4 +318,23 @@ public class TestVectorizedListColumnReader extends VectorizedColumnReaderTestBa reader.close(); } } + + private void testUnRepeateStringWithoutNullListRead() throws Exception { + Configuration conf = new Configuration(); + conf.set(IOConstants.COLUMNS, "list_binary_field_for_repeat_test"); + conf.set(IOConstants.COLUMNS_TYPES, "array<string>"); + conf.setBoolean(ColumnProjectionUtils.READ_ALL_COLUMNS, false); + conf.set(ColumnProjectionUtils.READ_COLUMN_IDS_CONF_STR, "0"); + VectorizedParquetRecordReader reader = createTestParquetReader( + "message hive_schema {repeated binary list_binary_field_for_repeat_test;}", conf); + VectorizedRowBatch previous = reader.createValue(); + try { + while (reader.next(NullWritable.get(), previous)) { + ListColumnVector vector = (ListColumnVector) previous.cols[0]; + assertEquals((vector.offsets.length == 1),vector.isRepeating); + } + } finally { + reader.close(); + } + } } http://git-wip-us.apache.org/repos/asf/hive/blob/11227eba/ql/src/test/org/apache/hadoop/hive/ql/io/parquet/VectorizedColumnReaderTestBase.java ---------------------------------------------------------------------- diff --git a/ql/src/test/org/apache/hadoop/hive/ql/io/parquet/VectorizedColumnReaderTestBase.java b/ql/src/test/org/apache/hadoop/hive/ql/io/parquet/VectorizedColumnReaderTestBase.java index 33c5c82..db7777d 100644 --- a/ql/src/test/org/apache/hadoop/hive/ql/io/parquet/VectorizedColumnReaderTestBase.java +++ b/ql/src/test/org/apache/hadoop/hive/ql/io/parquet/VectorizedColumnReaderTestBase.java @@ -124,6 +124,7 @@ public class VectorizedColumnReaderTestBase { + "repeated fixed_len_byte_array(3) list_byte_array_field;" + "repeated binary list_binary_field;" + "repeated binary list_decimal_field (DECIMAL(5,2));" + + "repeated binary list_binary_field_for_repeat_test;" + "repeated int32 list_int32_field_for_repeat_test;" + "repeated group map_int32 (MAP_KEY_VALUE) {\n" + " required int32 key;\n"
