HIVE-11307. Remove getWritableObject from ColumnVectorBatch. (omalley reviewed by prasanthj)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/0ead9fe6 Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/0ead9fe6 Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/0ead9fe6 Branch: refs/heads/llap Commit: 0ead9fe6895d582ccbd289a4576b34f7d2ca15b8 Parents: 29651cd Author: Owen O'Malley <[email protected]> Authored: Tue Jul 28 12:57:39 2015 -0700 Committer: Owen O'Malley <[email protected]> Committed: Tue Jul 28 12:57:39 2015 -0700 ---------------------------------------------------------------------- .../hive/ql/exec/vector/BytesColumnVector.java | 27 +------ .../hive/ql/exec/vector/ColumnVector.java | 4 -- .../ql/exec/vector/DecimalColumnVector.java | 23 +----- .../hive/ql/exec/vector/DoubleColumnVector.java | 18 ----- .../hive/ql/exec/vector/LongColumnVector.java | 24 ++----- .../exec/vector/TestVectorizedRowBatchCtx.java | 6 +- .../hive/ql/io/orc/TestInputOutputFormat.java | 5 +- .../hive/ql/io/orc/TestVectorizedORCReader.java | 75 ++++++++++++++------ 8 files changed, 68 insertions(+), 114 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hive/blob/0ead9fe6/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/BytesColumnVector.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/BytesColumnVector.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/BytesColumnVector.java index c9a0fa2..02c52fa 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/BytesColumnVector.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/BytesColumnVector.java @@ -18,12 +18,6 @@ package org.apache.hadoop.hive.ql.exec.vector; -import java.util.Arrays; - -import org.apache.hadoop.io.NullWritable; -import org.apache.hadoop.io.Text; -import org.apache.hadoop.io.Writable; - /** * This class supports string and binary data by value reference -- i.e. each field is * explicitly present, as opposed to provided by a dictionary reference. @@ -51,9 +45,6 @@ public class BytesColumnVector extends ColumnVector { private byte[] buffer; // optional buffer to use when actually copying in data private int nextFree; // next free position in buffer - // Reusable text object - private final Text textObject = new Text(); - // Estimate that there will be 16 bytes per entry static final int DEFAULT_BUFFER_SIZE = 16 * VectorizedRowBatch.DEFAULT_SIZE; @@ -215,22 +206,6 @@ public class BytesColumnVector extends ColumnVector { buffer = newBuffer; } - @Override - public Writable getWritableObject(int index) { - if (this.isRepeating) { - index = 0; - } - Writable result = null; - if (!isNull[index] && vector[index] != null) { - textObject.clear(); - textObject.append(vector[index], start[index], length[index]); - result = textObject; - } else { - result = NullWritable.get(); - } - return result; - } - /** Copy the current object contents into the output. Only copy selected entries, * as indicated by selectedInUse and the sel array. */ @@ -294,7 +269,7 @@ public class BytesColumnVector extends ColumnVector { // Only copy data values if entry is not null. The string value // at position 0 is undefined if the position 0 value is null. - if (noNulls || (!noNulls && !isNull[0])) { + if (noNulls || !isNull[0]) { // loops start at position 1 because position 0 is already set if (selectedInUse) { http://git-wip-us.apache.org/repos/asf/hive/blob/0ead9fe6/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ColumnVector.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ColumnVector.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ColumnVector.java index 49d4c12..4b5cf39 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ColumnVector.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ColumnVector.java @@ -20,8 +20,6 @@ package org.apache.hadoop.hive.ql.exec.vector; import java.util.Arrays; -import org.apache.hadoop.io.Writable; - /** * ColumnVector contains the shared structure for the sub-types, * including NULL information, and whether this vector @@ -64,8 +62,6 @@ public abstract class ColumnVector { private boolean preFlattenIsRepeating; private boolean preFlattenNoNulls; - public abstract Writable getWritableObject(int index); - /** * Constructor for super-class ColumnVector. This is not called directly, * but used to initialize inherited fields. http://git-wip-us.apache.org/repos/asf/hive/blob/0ead9fe6/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/DecimalColumnVector.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/DecimalColumnVector.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/DecimalColumnVector.java index 0f63b29..74a9d5f 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/DecimalColumnVector.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/DecimalColumnVector.java @@ -22,9 +22,6 @@ import java.math.BigInteger; import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable; import org.apache.hadoop.hive.common.type.HiveDecimal; -import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; -import org.apache.hadoop.io.NullWritable; -import org.apache.hadoop.io.Writable; public class DecimalColumnVector extends ColumnVector { @@ -39,8 +36,6 @@ public class DecimalColumnVector extends ColumnVector { public short scale; public short precision; - private final HiveDecimalWritable writableObj = new HiveDecimalWritable(); - public DecimalColumnVector(int precision, int scale) { this(VectorizedRowBatch.DEFAULT_SIZE, precision, scale); } @@ -49,27 +44,13 @@ public class DecimalColumnVector extends ColumnVector { super(size); this.precision = (short) precision; this.scale = (short) scale; - final int len = size; - vector = new HiveDecimalWritable[len]; - for (int i = 0; i < len; i++) { + vector = new HiveDecimalWritable[size]; + for (int i = 0; i < size; i++) { vector[i] = new HiveDecimalWritable(HiveDecimal.ZERO); } } @Override - public Writable getWritableObject(int index) { - if (isRepeating) { - index = 0; - } - if (!noNulls && isNull[index]) { - return NullWritable.get(); - } else { - writableObj.set(vector[index]); - return writableObj; - } - } - - @Override public void flatten(boolean selectedInUse, int[] sel, int size) { // TODO Auto-generated method stub } http://git-wip-us.apache.org/repos/asf/hive/blob/0ead9fe6/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/DoubleColumnVector.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/DoubleColumnVector.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/DoubleColumnVector.java index 013a9f5..4a7811d 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/DoubleColumnVector.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/DoubleColumnVector.java @@ -19,10 +19,6 @@ package org.apache.hadoop.hive.ql.exec.vector; import java.util.Arrays; -import org.apache.hadoop.hive.serde2.io.DoubleWritable; -import org.apache.hadoop.io.NullWritable; -import org.apache.hadoop.io.Writable; - /** * This class represents a nullable double precision floating point column vector. * This class will be used for operations on all floating point types (float, double) @@ -36,7 +32,6 @@ import org.apache.hadoop.io.Writable; */ public class DoubleColumnVector extends ColumnVector { public double[] vector; - private final DoubleWritable writableObj = new DoubleWritable(); public static final double NULL_VALUE = Double.NaN; /** @@ -57,19 +52,6 @@ public class DoubleColumnVector extends ColumnVector { vector = new double[len]; } - @Override - public Writable getWritableObject(int index) { - if (this.isRepeating) { - index = 0; - } - if (!noNulls && isNull[index]) { - return NullWritable.get(); - } else { - writableObj.set(vector[index]); - return writableObj; - } - } - // Copy the current object contents into the output. Only copy selected entries, // as indicated by selectedInUse and the sel array. public void copySelected( http://git-wip-us.apache.org/repos/asf/hive/blob/0ead9fe6/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/LongColumnVector.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/LongColumnVector.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/LongColumnVector.java index d900cc6..5702584 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/LongColumnVector.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/LongColumnVector.java @@ -19,10 +19,6 @@ package org.apache.hadoop.hive.ql.exec.vector; import java.util.Arrays; -import org.apache.hadoop.io.LongWritable; -import org.apache.hadoop.io.NullWritable; -import org.apache.hadoop.io.Writable; - /** * This class represents a nullable int column vector. * This class will be used for operations on all integer types (tinyint, smallint, int, bigint) @@ -36,7 +32,6 @@ import org.apache.hadoop.io.Writable; */ public class LongColumnVector extends ColumnVector { public long[] vector; - private final LongWritable writableObj = new LongWritable(); public static final long NULL_VALUE = 1; /** @@ -50,26 +45,13 @@ public class LongColumnVector extends ColumnVector { /** * Don't use this except for testing purposes. * - * @param len + * @param len the number of rows */ public LongColumnVector(int len) { super(len); vector = new long[len]; } - @Override - public Writable getWritableObject(int index) { - if (this.isRepeating) { - index = 0; - } - if (!noNulls && isNull[index]) { - return NullWritable.get(); - } else { - writableObj.set(vector[index]); - return writableObj; - } - } - // Copy the current object contents into the output. Only copy selected entries, // as indicated by selectedInUse and the sel array. public void copySelected( @@ -141,7 +123,9 @@ public class LongColumnVector extends ColumnVector { } } else { - System.arraycopy(vector, 0, output.vector, 0, size); + for(int i = 0; i < size; ++i) { + output.vector[i] = vector[i]; + } } // Copy nulls over if needed http://git-wip-us.apache.org/repos/asf/hive/blob/0ead9fe6/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorizedRowBatchCtx.java ---------------------------------------------------------------------- diff --git a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorizedRowBatchCtx.java b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorizedRowBatchCtx.java index 473ebac..3321823 100644 --- a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorizedRowBatchCtx.java +++ b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorizedRowBatchCtx.java @@ -289,7 +289,8 @@ public class TestVectorizedRowBatchCtx { case BINARY: { BytesColumnVector bcv = (BytesColumnVector) batch.cols[j]; BytesWritable colBinary = (BytesWritable) writableCol; - BytesWritable batchBinary = (BytesWritable) bcv.getWritableObject(i); + BytesWritable batchBinary = new BytesWritable(); + batchBinary.set(bcv.vector[i], bcv.start[i], bcv.length[i]); byte[] a = colBinary.getBytes(); byte[] b = batchBinary.getBytes(); Assert.assertEquals(true, Arrays.equals(a, b)); @@ -298,7 +299,8 @@ public class TestVectorizedRowBatchCtx { case STRING: { BytesColumnVector bcv = (BytesColumnVector) batch.cols[j]; Text colText = (Text) writableCol; - Text batchText = (Text) bcv.getWritableObject(i); + Text batchText = new Text(); + batchText.set(bcv.vector[i], bcv.start[i], bcv.length[i]); String a = colText.toString(); String b = batchText.toString(); Assert.assertEquals(true, a.equals(b)); http://git-wip-us.apache.org/repos/asf/hive/blob/0ead9fe6/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInputOutputFormat.java ---------------------------------------------------------------------- diff --git a/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInputOutputFormat.java b/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInputOutputFormat.java index e40e1d2..46deda5 100644 --- a/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInputOutputFormat.java +++ b/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInputOutputFormat.java @@ -1542,8 +1542,11 @@ public class TestInputOutputFormat { assertEquals("checking long " + i, i, longColumn.vector[i]); assertEquals("checking float " + i, i, floatColumn.vector[i], 0.0001); assertEquals("checking double " + i, i, doubleCoulmn.vector[i], 0.0001); + Text strValue = new Text(); + strValue.set(stringColumn.vector[i], stringColumn.start[i], + stringColumn.length[i]); assertEquals("checking string " + i, new Text(Long.toHexString(i)), - stringColumn.getWritableObject(i)); + strValue); assertEquals("checking decimal " + i, HiveDecimal.create(i), decimalColumn.vector[i].getHiveDecimal()); assertEquals("checking date " + i, i, dateColumn.vector[i]); http://git-wip-us.apache.org/repos/asf/hive/blob/0ead9fe6/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestVectorizedORCReader.java ---------------------------------------------------------------------- diff --git a/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestVectorizedORCReader.java b/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestVectorizedORCReader.java index e72e5cf..c739aef 100644 --- a/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestVectorizedORCReader.java +++ b/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestVectorizedORCReader.java @@ -30,18 +30,30 @@ import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.hive.common.type.HiveDecimal; +import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.ColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.serde2.io.ByteWritable; import org.apache.hadoop.hive.serde2.io.DateWritable; +import org.apache.hadoop.hive.serde2.io.DoubleWritable; import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable; +import org.apache.hadoop.hive.serde2.io.ShortWritable; import org.apache.hadoop.hive.serde2.io.TimestampWritable; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory; import org.apache.hadoop.io.BooleanWritable; +import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.io.NullWritable; +import org.apache.hadoop.io.Text; import org.junit.Before; import org.junit.Test; +import static org.junit.Assert.assertEquals; + /** * * Class that tests ORC reader vectorization by comparing records that are @@ -149,42 +161,61 @@ public class TestVectorizedORCReader { row = (OrcStruct) rr.next(row); for (int j = 0; j < batch.cols.length; j++) { Object a = (row.getFieldValue(j)); - Object b = batch.cols[j].getWritableObject(i); - // Boolean values are stores a 1's and 0's, so convert and compare - if (a instanceof BooleanWritable) { + ColumnVector cv = batch.cols[j]; + // if the value is repeating, use row 0 + int rowId = cv.isRepeating ? 0 : i; + + // make sure the null flag agrees + if (a == null) { + Assert.assertEquals(true, !cv.noNulls && cv.isNull[rowId]); + } else if (a instanceof BooleanWritable) { + + // Boolean values are stores a 1's and 0's, so convert and compare Long temp = (long) (((BooleanWritable) a).get() ? 1 : 0); - Assert.assertEquals(true, temp.toString().equals(b.toString())); - continue; - } - // Timestamps are stored as long, so convert and compare - if (a instanceof TimestampWritable) { + long b = ((LongColumnVector) cv).vector[rowId]; + Assert.assertEquals(temp.toString(), Long.toString(b)); + } else if (a instanceof TimestampWritable) { + // Timestamps are stored as long, so convert and compare TimestampWritable t = ((TimestampWritable) a); // Timestamp.getTime() is overriden and is // long time = super.getTime(); // return (time + (nanos / 1000000)); Long timeInNanoSec = (t.getTimestamp().getTime() * 1000000) + (t.getTimestamp().getNanos() % 1000000); - Assert.assertEquals(true, timeInNanoSec.toString().equals(b.toString())); - continue; - } + long b = ((LongColumnVector) cv).vector[rowId]; + Assert.assertEquals(timeInNanoSec.toString(), Long.toString(b)); + + } else if (a instanceof DateWritable) { + // Dates are stored as long, so convert and compare - // Dates are stored as long, so convert and compare - if (a instanceof DateWritable) { DateWritable adt = (DateWritable) a; - Assert.assertEquals(adt.get().getTime(), DateWritable.daysToMillis((int) ((LongWritable) b).get())); - continue; - } + long b = ((LongColumnVector) cv).vector[rowId]; + Assert.assertEquals(adt.get().getTime(), + DateWritable.daysToMillis((int) b)); - // Decimals are stored as BigInteger, so convert and compare - if (a instanceof HiveDecimalWritable) { + } else if (a instanceof HiveDecimalWritable) { + // Decimals are stored as BigInteger, so convert and compare HiveDecimalWritable dec = (HiveDecimalWritable) a; + HiveDecimalWritable b = ((DecimalColumnVector) cv).vector[i]; Assert.assertEquals(dec, b); - } - if (null == a) { - Assert.assertEquals(true, (b == null || (b instanceof NullWritable))); + } else if (a instanceof DoubleWritable) { + + double b = ((DoubleColumnVector) cv).vector[rowId]; + assertEquals(a.toString(), Double.toString(b)); + } else if (a instanceof Text) { + BytesColumnVector bcv = (BytesColumnVector) cv; + Text b = new Text(); + b.set(bcv.vector[rowId], bcv.start[rowId], bcv.length[rowId]); + assertEquals(a, b); + } else if (a instanceof IntWritable || + a instanceof LongWritable || + a instanceof ByteWritable || + a instanceof ShortWritable) { + assertEquals(a.toString(), + Long.toString(((LongColumnVector) cv).vector[rowId])); } else { - Assert.assertEquals(true, b.toString().equals(a.toString())); + assertEquals("huh", a.getClass().getName()); } } }
