[07/53] [abbrv] hive git commit: HIVE-11307. Remove getWritableObject from ColumnVectorBatch. (omalley reviewed by prasanthj)

sershe Wed, 05 Aug 2015 17:50:55 -0700

HIVE-11307. Remove getWritableObject from ColumnVectorBatch. (omalley
reviewed by prasanthj)



Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/0ead9fe6
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/0ead9fe6
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/0ead9fe6

Branch: refs/heads/llap
Commit: 0ead9fe6895d582ccbd289a4576b34f7d2ca15b8
Parents: 29651cd
Author: Owen O'Malley <[email protected]>
Authored: Tue Jul 28 12:57:39 2015 -0700
Committer: Owen O'Malley <[email protected]>
Committed: Tue Jul 28 12:57:39 2015 -0700

----------------------------------------------------------------------
 .../hive/ql/exec/vector/BytesColumnVector.java  | 27 +------
 .../hive/ql/exec/vector/ColumnVector.java       |  4 --
 .../ql/exec/vector/DecimalColumnVector.java     | 23 +-----
 .../hive/ql/exec/vector/DoubleColumnVector.java | 18 -----
 .../hive/ql/exec/vector/LongColumnVector.java   | 24 ++-----
 .../exec/vector/TestVectorizedRowBatchCtx.java  |  6 +-
 .../hive/ql/io/orc/TestInputOutputFormat.java   |  5 +-
 .../hive/ql/io/orc/TestVectorizedORCReader.java | 75 ++++++++++++++------
 8 files changed, 68 insertions(+), 114 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/0ead9fe6/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/BytesColumnVector.java
----------------------------------------------------------------------
diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/BytesColumnVector.java 
b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/BytesColumnVector.java
index c9a0fa2..02c52fa 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/BytesColumnVector.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/BytesColumnVector.java
@@ -18,12 +18,6 @@
 
 package org.apache.hadoop.hive.ql.exec.vector;
 
-import java.util.Arrays;
-
-import org.apache.hadoop.io.NullWritable;
-import org.apache.hadoop.io.Text;
-import org.apache.hadoop.io.Writable;
-
 /**
  * This class supports string and binary data by value reference -- i.e. each 
field is
  * explicitly present, as opposed to provided by a dictionary reference.
@@ -51,9 +45,6 @@ public class BytesColumnVector extends ColumnVector {
   private byte[] buffer;   // optional buffer to use when actually copying in 
data
   private int nextFree;    // next free position in buffer
 
-  // Reusable text object
-  private final Text textObject = new Text();
-
   // Estimate that there will be 16 bytes per entry
   static final int DEFAULT_BUFFER_SIZE = 16 * VectorizedRowBatch.DEFAULT_SIZE;
 
@@ -215,22 +206,6 @@ public class BytesColumnVector extends ColumnVector {
     buffer = newBuffer;
   }
 
-  @Override
-  public Writable getWritableObject(int index) {
-    if (this.isRepeating) {
-      index = 0;
-    }
-    Writable result = null;
-    if (!isNull[index] && vector[index] != null) {
-      textObject.clear();
-      textObject.append(vector[index], start[index], length[index]);
-      result = textObject;
-    } else {
-      result = NullWritable.get();
-    }
-    return result;
-  }
-
   /** Copy the current object contents into the output. Only copy selected 
entries,
     * as indicated by selectedInUse and the sel array.
     */
@@ -294,7 +269,7 @@ public class BytesColumnVector extends ColumnVector {
 
       // Only copy data values if entry is not null. The string value
       // at position 0 is undefined if the position 0 value is null.
-      if (noNulls || (!noNulls && !isNull[0])) {
+      if (noNulls || !isNull[0]) {
 
         // loops start at position 1 because position 0 is already set
         if (selectedInUse) {

http://git-wip-us.apache.org/repos/asf/hive/blob/0ead9fe6/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ColumnVector.java
----------------------------------------------------------------------
diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ColumnVector.java 
b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ColumnVector.java
index 49d4c12..4b5cf39 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ColumnVector.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ColumnVector.java
@@ -20,8 +20,6 @@ package org.apache.hadoop.hive.ql.exec.vector;
 
 import java.util.Arrays;
 
-import org.apache.hadoop.io.Writable;
-
 /**
  * ColumnVector contains the shared structure for the sub-types,
  * including NULL information, and whether this vector
@@ -64,8 +62,6 @@ public abstract class ColumnVector {
   private boolean preFlattenIsRepeating;
   private boolean preFlattenNoNulls;
 
-  public abstract Writable getWritableObject(int index);
-
   /**
    * Constructor for super-class ColumnVector. This is not called directly,
    * but used to initialize inherited fields.

http://git-wip-us.apache.org/repos/asf/hive/blob/0ead9fe6/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/DecimalColumnVector.java
----------------------------------------------------------------------
diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/DecimalColumnVector.java 
b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/DecimalColumnVector.java
index 0f63b29..74a9d5f 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/DecimalColumnVector.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/DecimalColumnVector.java
@@ -22,9 +22,6 @@ import java.math.BigInteger;
 
 import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable;
 import org.apache.hadoop.hive.common.type.HiveDecimal;
-import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
-import org.apache.hadoop.io.NullWritable;
-import org.apache.hadoop.io.Writable;
 
 public class DecimalColumnVector extends ColumnVector {
 
@@ -39,8 +36,6 @@ public class DecimalColumnVector extends ColumnVector {
   public short scale;
   public short precision;
 
-  private final HiveDecimalWritable writableObj = new HiveDecimalWritable();
-
   public DecimalColumnVector(int precision, int scale) {
     this(VectorizedRowBatch.DEFAULT_SIZE, precision, scale);
   }
@@ -49,27 +44,13 @@ public class DecimalColumnVector extends ColumnVector {
     super(size);
     this.precision = (short) precision;
     this.scale = (short) scale;
-    final int len = size;
-    vector = new HiveDecimalWritable[len];
-    for (int i = 0; i < len; i++) {
+    vector = new HiveDecimalWritable[size];
+    for (int i = 0; i < size; i++) {
       vector[i] = new HiveDecimalWritable(HiveDecimal.ZERO);
     }
   }
 
   @Override
-  public Writable getWritableObject(int index) {
-    if (isRepeating) {
-      index = 0;
-    }
-    if (!noNulls && isNull[index]) {
-      return NullWritable.get();
-    } else {
-      writableObj.set(vector[index]);
-      return writableObj;
-    }
-  }
-
-  @Override
   public void flatten(boolean selectedInUse, int[] sel, int size) {
     // TODO Auto-generated method stub
   }

http://git-wip-us.apache.org/repos/asf/hive/blob/0ead9fe6/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/DoubleColumnVector.java
----------------------------------------------------------------------
diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/DoubleColumnVector.java 
b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/DoubleColumnVector.java
index 013a9f5..4a7811d 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/DoubleColumnVector.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/DoubleColumnVector.java
@@ -19,10 +19,6 @@ package org.apache.hadoop.hive.ql.exec.vector;
 
 import java.util.Arrays;
 
-import org.apache.hadoop.hive.serde2.io.DoubleWritable;
-import org.apache.hadoop.io.NullWritable;
-import org.apache.hadoop.io.Writable;
-
 /**
  * This class represents a nullable double precision floating point column 
vector.
  * This class will be used for operations on all floating point types (float, 
double)
@@ -36,7 +32,6 @@ import org.apache.hadoop.io.Writable;
  */
 public class DoubleColumnVector extends ColumnVector {
   public double[] vector;
-  private final DoubleWritable writableObj = new DoubleWritable();
   public static final double NULL_VALUE = Double.NaN;
 
   /**
@@ -57,19 +52,6 @@ public class DoubleColumnVector extends ColumnVector {
     vector = new double[len];
   }
 
-  @Override
-  public Writable getWritableObject(int index) {
-    if (this.isRepeating) {
-      index = 0;
-    }
-    if (!noNulls && isNull[index]) {
-      return NullWritable.get();
-    } else {
-      writableObj.set(vector[index]);
-      return writableObj;
-    }
-  }
-
   // Copy the current object contents into the output. Only copy selected 
entries,
   // as indicated by selectedInUse and the sel array.
   public void copySelected(

http://git-wip-us.apache.org/repos/asf/hive/blob/0ead9fe6/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/LongColumnVector.java
----------------------------------------------------------------------
diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/LongColumnVector.java 
b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/LongColumnVector.java
index d900cc6..5702584 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/LongColumnVector.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/LongColumnVector.java
@@ -19,10 +19,6 @@ package org.apache.hadoop.hive.ql.exec.vector;
 
 import java.util.Arrays;
 
-import org.apache.hadoop.io.LongWritable;
-import org.apache.hadoop.io.NullWritable;
-import org.apache.hadoop.io.Writable;
-
 /**
  * This class represents a nullable int column vector.
  * This class will be used for operations on all integer types (tinyint, 
smallint, int, bigint)
@@ -36,7 +32,6 @@ import org.apache.hadoop.io.Writable;
  */
 public class LongColumnVector extends ColumnVector {
   public long[] vector;
-  private final LongWritable writableObj = new LongWritable();
   public static final long NULL_VALUE = 1;
 
   /**
@@ -50,26 +45,13 @@ public class LongColumnVector extends ColumnVector {
   /**
    * Don't use this except for testing purposes.
    *
-   * @param len
+   * @param len the number of rows
    */
   public LongColumnVector(int len) {
     super(len);
     vector = new long[len];
   }
 
-  @Override
-  public Writable getWritableObject(int index) {
-    if (this.isRepeating) {
-      index = 0;
-    }
-    if (!noNulls && isNull[index]) {
-      return NullWritable.get();
-    } else {
-      writableObj.set(vector[index]);
-      return writableObj;
-    }
-  }
-
   // Copy the current object contents into the output. Only copy selected 
entries,
   // as indicated by selectedInUse and the sel array.
   public void copySelected(
@@ -141,7 +123,9 @@ public class LongColumnVector extends ColumnVector {
       }
     }
     else {
-      System.arraycopy(vector, 0, output.vector, 0, size);
+      for(int i = 0; i < size; ++i) {
+        output.vector[i] = vector[i];
+      }
     }
 
     // Copy nulls over if needed

http://git-wip-us.apache.org/repos/asf/hive/blob/0ead9fe6/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorizedRowBatchCtx.java
----------------------------------------------------------------------
diff --git 
a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorizedRowBatchCtx.java
 
b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorizedRowBatchCtx.java
index 473ebac..3321823 100644
--- 
a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorizedRowBatchCtx.java
+++ 
b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorizedRowBatchCtx.java
@@ -289,7 +289,8 @@ public class TestVectorizedRowBatchCtx {
           case BINARY: {
             BytesColumnVector bcv = (BytesColumnVector) batch.cols[j];
               BytesWritable colBinary = (BytesWritable) writableCol;
-              BytesWritable batchBinary = (BytesWritable) 
bcv.getWritableObject(i);
+              BytesWritable batchBinary = new BytesWritable();
+              batchBinary.set(bcv.vector[i], bcv.start[i], bcv.length[i]);
               byte[] a = colBinary.getBytes();
               byte[] b = batchBinary.getBytes();
               Assert.assertEquals(true, Arrays.equals(a, b));
@@ -298,7 +299,8 @@ public class TestVectorizedRowBatchCtx {
           case STRING: {
             BytesColumnVector bcv = (BytesColumnVector) batch.cols[j];
             Text colText = (Text) writableCol;
-            Text batchText = (Text) bcv.getWritableObject(i);
+            Text batchText = new Text();
+            batchText.set(bcv.vector[i], bcv.start[i], bcv.length[i]);
             String a = colText.toString();
             String b = batchText.toString();
             Assert.assertEquals(true, a.equals(b));

http://git-wip-us.apache.org/repos/asf/hive/blob/0ead9fe6/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInputOutputFormat.java
----------------------------------------------------------------------
diff --git 
a/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInputOutputFormat.java 
b/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInputOutputFormat.java
index e40e1d2..46deda5 100644
--- a/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInputOutputFormat.java
+++ b/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInputOutputFormat.java
@@ -1542,8 +1542,11 @@ public class TestInputOutputFormat {
       assertEquals("checking long " + i, i, longColumn.vector[i]);
       assertEquals("checking float " + i, i, floatColumn.vector[i], 0.0001);
       assertEquals("checking double " + i, i, doubleCoulmn.vector[i], 0.0001);
+      Text strValue = new Text();
+      strValue.set(stringColumn.vector[i], stringColumn.start[i],
+          stringColumn.length[i]);
       assertEquals("checking string " + i, new Text(Long.toHexString(i)),
-          stringColumn.getWritableObject(i));
+          strValue);
       assertEquals("checking decimal " + i, HiveDecimal.create(i),
           decimalColumn.vector[i].getHiveDecimal());
       assertEquals("checking date " + i, i, dateColumn.vector[i]);

http://git-wip-us.apache.org/repos/asf/hive/blob/0ead9fe6/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestVectorizedORCReader.java
----------------------------------------------------------------------
diff --git 
a/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestVectorizedORCReader.java 
b/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestVectorizedORCReader.java
index e72e5cf..c739aef 100644
--- a/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestVectorizedORCReader.java
+++ b/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestVectorizedORCReader.java
@@ -30,18 +30,30 @@ import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.hive.common.type.HiveDecimal;
+import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.ColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector;
 import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
+import org.apache.hadoop.hive.serde2.io.ByteWritable;
 import org.apache.hadoop.hive.serde2.io.DateWritable;
+import org.apache.hadoop.hive.serde2.io.DoubleWritable;
 import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable;
+import org.apache.hadoop.hive.serde2.io.ShortWritable;
 import org.apache.hadoop.hive.serde2.io.TimestampWritable;
 import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
 import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
 import org.apache.hadoop.io.BooleanWritable;
+import org.apache.hadoop.io.IntWritable;
 import org.apache.hadoop.io.LongWritable;
 import org.apache.hadoop.io.NullWritable;
+import org.apache.hadoop.io.Text;
 import org.junit.Before;
 import org.junit.Test;
 
+import static org.junit.Assert.assertEquals;
+
 /**
 *
 * Class that tests ORC reader vectorization by comparing records that are
@@ -149,42 +161,61 @@ public class TestVectorizedORCReader {
         row = (OrcStruct) rr.next(row);
         for (int j = 0; j < batch.cols.length; j++) {
           Object a = (row.getFieldValue(j));
-          Object b = batch.cols[j].getWritableObject(i);
-          // Boolean values are stores a 1's and 0's, so convert and compare
-          if (a instanceof BooleanWritable) {
+          ColumnVector cv = batch.cols[j];
+          // if the value is repeating, use row 0
+          int rowId = cv.isRepeating ? 0 : i;
+
+          // make sure the null flag agrees
+          if (a == null) {
+            Assert.assertEquals(true, !cv.noNulls && cv.isNull[rowId]);
+          } else if (a instanceof BooleanWritable) {
+
+            // Boolean values are stores a 1's and 0's, so convert and compare
             Long temp = (long) (((BooleanWritable) a).get() ? 1 : 0);
-            Assert.assertEquals(true, temp.toString().equals(b.toString()));
-            continue;
-          }
-          // Timestamps are stored as long, so convert and compare
-          if (a instanceof TimestampWritable) {
+            long b = ((LongColumnVector) cv).vector[rowId];
+            Assert.assertEquals(temp.toString(), Long.toString(b));
+          } else if (a instanceof TimestampWritable) {
+            // Timestamps are stored as long, so convert and compare
             TimestampWritable t = ((TimestampWritable) a);
             // Timestamp.getTime() is overriden and is 
             // long time = super.getTime();
             // return (time + (nanos / 1000000));
             Long timeInNanoSec = (t.getTimestamp().getTime() * 1000000)
                 + (t.getTimestamp().getNanos() % 1000000);
-            Assert.assertEquals(true, 
timeInNanoSec.toString().equals(b.toString()));
-            continue;
-          }
+            long b = ((LongColumnVector) cv).vector[rowId];
+            Assert.assertEquals(timeInNanoSec.toString(), Long.toString(b));
+
+          } else if (a instanceof DateWritable) {
+            // Dates are stored as long, so convert and compare
 
-          // Dates are stored as long, so convert and compare
-          if (a instanceof DateWritable) {
             DateWritable adt = (DateWritable) a;
-            Assert.assertEquals(adt.get().getTime(), 
DateWritable.daysToMillis((int) ((LongWritable) b).get()));
-            continue;
-          }
+            long b = ((LongColumnVector) cv).vector[rowId];
+            Assert.assertEquals(adt.get().getTime(),
+                DateWritable.daysToMillis((int) b));
 
-          // Decimals are stored as BigInteger, so convert and compare
-          if (a instanceof HiveDecimalWritable) {
+          } else if (a instanceof HiveDecimalWritable) {
+            // Decimals are stored as BigInteger, so convert and compare
             HiveDecimalWritable dec = (HiveDecimalWritable) a;
+            HiveDecimalWritable b = ((DecimalColumnVector) cv).vector[i];
             Assert.assertEquals(dec, b);
-          }
 
-          if (null == a) {
-            Assert.assertEquals(true, (b == null || (b instanceof 
NullWritable)));
+          } else if (a instanceof DoubleWritable) {
+
+            double b = ((DoubleColumnVector) cv).vector[rowId];
+            assertEquals(a.toString(), Double.toString(b));
+          } else if (a instanceof Text) {
+            BytesColumnVector bcv = (BytesColumnVector) cv;
+            Text b = new Text();
+            b.set(bcv.vector[rowId], bcv.start[rowId], bcv.length[rowId]);
+            assertEquals(a, b);
+          } else if (a instanceof IntWritable ||
+              a instanceof LongWritable ||
+              a instanceof ByteWritable ||
+              a instanceof ShortWritable) {
+            assertEquals(a.toString(),
+                Long.toString(((LongColumnVector) cv).vector[rowId]));
           } else {
-            Assert.assertEquals(true, b.toString().equals(a.toString()));
+            assertEquals("huh", a.getClass().getName());
           }
         }
       }

[07/53] [abbrv] hive git commit: HIVE-11307. Remove getWritableObject from ColumnVectorBatch. (omalley reviewed by prasanthj)

Reply via email to