Author: jitendra
Date: Fri Jan 31 19:32:56 2014
New Revision: 1563224
URL: http://svn.apache.org/r1563224
Log:
HIVE-6178. Implement vectorized reader for DECIMAL datatype for ORC format
(jitendra)
Modified:
hive/trunk/common/src/java/org/apache/hadoop/hive/common/type/Decimal128.java
hive/trunk/common/src/java/org/apache/hadoop/hive/common/type/UnsignedInt128.java
hive/trunk/common/src/test/org/apache/hadoop/hive/common/type/TestUnsignedInt128.java
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/DecimalColumnVector.java
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java
hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestVectorizedORCReader.java
Modified:
hive/trunk/common/src/java/org/apache/hadoop/hive/common/type/Decimal128.java
URL:
http://svn.apache.org/viewvc/hive/trunk/common/src/java/org/apache/hadoop/hive/common/type/Decimal128.java?rev=1563224&r1=1563223&r2=1563224&view=diff
==============================================================================
---
hive/trunk/common/src/java/org/apache/hadoop/hive/common/type/Decimal128.java
(original)
+++
hive/trunk/common/src/java/org/apache/hadoop/hive/common/type/Decimal128.java
Fri Jan 31 19:32:56 2014
@@ -16,7 +16,7 @@
package org.apache.hadoop.hive.common.type;
import java.math.BigDecimal;
-import java.math.MathContext;
+import java.math.BigInteger;
import java.nio.IntBuffer;
/**
@@ -551,6 +551,25 @@ public final class Decimal128 extends Nu
}
/**
+ * Updates the value of this object with the given {@link BigInteger} and
scale.
+ *
+ * @param bigInt
+ * {@link java.math.BigInteger}
+ * @param scale
+ */
+ public void update(BigInteger bigInt, short scale) {
+ this.scale = scale;
+ this.signum = (byte) bigInt.compareTo(BigInteger.ZERO);
+ if (signum == 0) {
+ update(0);
+ } else if (signum < 0) {
+ unscaledValue.update(bigInt.negate());
+ } else {
+ unscaledValue.update(bigInt);
+ }
+ }
+
+ /**
* Updates the value of this object with the given string.
*
* @param str
@@ -1428,7 +1447,7 @@ public final class Decimal128 extends Nu
* @return {@code true} if and only if the specified {@code Object} is a
* {@code Decimal128} whose value and scale are equal to this
* {@code Decimal128}'s.
- * @see #compareTo(java.math.Decimal128)
+ * @see #compareTo(Decimal128)
* @see #hashCode
*/
@Override
Modified:
hive/trunk/common/src/java/org/apache/hadoop/hive/common/type/UnsignedInt128.java
URL:
http://svn.apache.org/viewvc/hive/trunk/common/src/java/org/apache/hadoop/hive/common/type/UnsignedInt128.java?rev=1563224&r1=1563223&r2=1563224&view=diff
==============================================================================
---
hive/trunk/common/src/java/org/apache/hadoop/hive/common/type/UnsignedInt128.java
(original)
+++
hive/trunk/common/src/java/org/apache/hadoop/hive/common/type/UnsignedInt128.java
Fri Jan 31 19:32:56 2014
@@ -18,7 +18,6 @@ package org.apache.hadoop.hive.common.ty
import java.math.BigInteger;
import java.nio.IntBuffer;
import java.util.Arrays;
-import org.apache.hadoop.hive.common.type.SqlMathUtil;
/**
* This code was originally written for Microsoft PolyBase.
@@ -117,11 +116,7 @@ public final class UnsignedInt128 implem
* v3
*/
public UnsignedInt128(int v0, int v1, int v2, int v3) {
- this.v[0] = v0;
- this.v[1] = v1;
- this.v[2] = v2;
- this.v[3] = v3;
- updateCount();
+ update(v0, v1, v2, v3);
}
/**
@@ -158,6 +153,32 @@ public final class UnsignedInt128 implem
update(str, offset, length);
}
+ /**
+ * Constructs from the given BigInteger
+ *
+ * @param bigInt
+ * java BigInteger
+ */
+ public UnsignedInt128(BigInteger bigInt) {
+ update(bigInt);
+ }
+
+ /**
+ * Updates the value of this object from the given {@link BigInteger}.
+ * Only positive BigIntegers are expected and behavior is undefined for
+ * negative BigIntegers.
+ *
+ * @param bigInt
+ * java BigInteger
+ */
+ public void update(BigInteger bigInt) {
+ int v0 = bigInt.intValue();
+ int v1 = bigInt.shiftRight(32).intValue();
+ int v2 = bigInt.shiftRight(64).intValue();
+ int v3 = bigInt.shiftRight(96).intValue();
+ update(v0, v1, v2, v3);
+ }
+
/** @return v[0] */
public int getV0() {
return v[0];
Modified:
hive/trunk/common/src/test/org/apache/hadoop/hive/common/type/TestUnsignedInt128.java
URL:
http://svn.apache.org/viewvc/hive/trunk/common/src/test/org/apache/hadoop/hive/common/type/TestUnsignedInt128.java?rev=1563224&r1=1563223&r2=1563224&view=diff
==============================================================================
---
hive/trunk/common/src/test/org/apache/hadoop/hive/common/type/TestUnsignedInt128.java
(original)
+++
hive/trunk/common/src/test/org/apache/hadoop/hive/common/type/TestUnsignedInt128.java
Fri Jan 31 19:32:56 2014
@@ -547,4 +547,13 @@ public class TestUnsignedInt128 {
BigInteger ans = bigInteger1.divide(bigInteger2);
assertEquals(ans, complicated1.toBigIntegerSlow());
}
+
+ @Test
+ public void testBigIntConversion() {
+ BigInteger bigInteger = BigInteger.valueOf(0x1ABCDEF0123456L);
+ UnsignedInt128 uInt128 = new UnsignedInt128(bigInteger);
+ System.out.println("Out = "+uInt128.toString());
+ System.out.println("Out = "+bigInteger.toString());
+ assertEquals(bigInteger, uInt128.toBigIntegerSlow());
+ }
}
Modified:
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/DecimalColumnVector.java
URL:
http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/DecimalColumnVector.java?rev=1563224&r1=1563223&r2=1563224&view=diff
==============================================================================
---
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/DecimalColumnVector.java
(original)
+++
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/DecimalColumnVector.java
Fri Jan 31 19:32:56 2014
@@ -18,7 +18,10 @@
package org.apache.hadoop.hive.ql.exec.vector;
import org.apache.hadoop.hive.common.type.Decimal128;
+import org.apache.hadoop.hive.common.type.HiveDecimal;
import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
+import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable;
+import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Writable;
public class DecimalColumnVector extends ColumnVector {
@@ -37,11 +40,17 @@ public class DecimalColumnVector extends
public short scale;
public short precision;
+ private final HiveDecimalWritable writableObj = new HiveDecimalWritable();
+
public DecimalColumnVector(int precision, int scale) {
- super(VectorizedRowBatch.DEFAULT_SIZE);
+ this(VectorizedRowBatch.DEFAULT_SIZE, precision, scale);
+ }
+
+ public DecimalColumnVector(int size, int precision, int scale) {
+ super(size);
this.precision = (short) precision;
this.scale = (short) scale;
- final int len = VectorizedRowBatch.DEFAULT_SIZE;
+ final int len = size;
vector = new Decimal128[len];
for (int i = 0; i < len; i++) {
vector[i] = new Decimal128(0, this.scale);
@@ -50,8 +59,16 @@ public class DecimalColumnVector extends
@Override
public Writable getWritableObject(int index) {
- // TODO Auto-generated method stub
- return null;
+ if (isRepeating) {
+ index = 0;
+ }
+ if (!noNulls && isNull[index]) {
+ return NullWritable.get();
+ } else {
+ Decimal128 dec = vector[index];
+ writableObj.set(HiveDecimal.create(dec.toBigDecimal()));
+ return writableObj;
+ }
}
@Override
Modified:
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java
URL:
http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java?rev=1563224&r1=1563223&r2=1563224&view=diff
==============================================================================
---
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java
(original)
+++
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java
Fri Jan 31 19:32:56 2014
@@ -19,6 +19,7 @@ package org.apache.hadoop.hive.ql.io.orc
import java.io.EOFException;
import java.io.IOException;
+import java.math.BigInteger;
import java.nio.ByteBuffer;
import java.sql.Date;
import java.sql.Timestamp;
@@ -34,11 +35,7 @@ import org.apache.hadoop.fs.FSDataInputS
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hive.common.type.HiveDecimal;
-import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector;
-import org.apache.hadoop.hive.ql.exec.vector.ColumnVector;
-import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector;
-import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector;
-import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
+import org.apache.hadoop.hive.ql.exec.vector.*;
import org.apache.hadoop.hive.ql.io.sarg.PredicateLeaf;
import org.apache.hadoop.hive.ql.io.sarg.SearchArgument;
import org.apache.hadoop.hive.ql.io.sarg.SearchArgument.TruthValue;
@@ -1043,6 +1040,7 @@ class RecordReaderImpl implements Record
private static class DecimalTreeReader extends TreeReader{
private InStream valueStream;
private IntegerReader scaleStream = null;
+ private LongColumnVector scratchScaleVector = new
LongColumnVector(VectorizedRowBatch.DEFAULT_SIZE);
private final int precision;
private final int scale;
@@ -1093,8 +1091,50 @@ class RecordReaderImpl implements Record
@Override
Object nextVector(Object previousVector, long batchSize) throws
IOException {
- throw new UnsupportedOperationException(
- "NextVector is not supported operation for Decimal type");
+ DecimalColumnVector result = null;
+ if (previousVector == null) {
+ result = new DecimalColumnVector(precision, scale);
+ } else {
+ result = (DecimalColumnVector) previousVector;
+ }
+
+ // Save the reference for isNull in the scratch vector
+ boolean [] scratchIsNull = scratchScaleVector.isNull;
+
+ // Read present/isNull stream
+ super.nextVector(result, batchSize);
+
+ // Read value entries based on isNull entries
+ if (result.isRepeating) {
+ if (!result.isNull[0]) {
+ BigInteger bInt = SerializationUtils.readBigInteger(valueStream);
+ short scaleInData = (short) scaleStream.next();
+ result.vector[0].update(bInt, scaleInData);
+
+ // Change the scale to match the schema if the scale in data is
different.
+ if (scale != scaleInData) {
+ result.vector[0].changeScaleDestructive((short) scale);
+ }
+ }
+ } else {
+ // result vector has isNull values set, use the same to read scale
vector.
+ scratchScaleVector.isNull = result.isNull;
+ scaleStream.nextVector(scratchScaleVector, batchSize);
+ for (int i = 0; i < batchSize; i++) {
+ if (!result.isNull[i]) {
+ BigInteger bInt = SerializationUtils.readBigInteger(valueStream);
+ result.vector[i].update(bInt, (short)
scratchScaleVector.vector[i]);
+
+ // Change the scale to match the schema if the scale in data is
different.
+ if (scale != scratchScaleVector.vector[i]) {
+ result.vector[i].changeScaleDestructive((short) scale);
+ }
+ }
+ }
+ }
+ // Switch back the null vector.
+ scratchScaleVector.isNull = scratchIsNull;
+ return result;
}
@Override
Modified:
hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestVectorizedORCReader.java
URL:
http://svn.apache.org/viewvc/hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestVectorizedORCReader.java?rev=1563224&r1=1563223&r2=1563224&view=diff
==============================================================================
---
hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestVectorizedORCReader.java
(original)
+++
hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestVectorizedORCReader.java
Fri Jan 31 19:32:56 2014
@@ -22,8 +22,11 @@ import junit.framework.Assert;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hive.common.type.Decimal128;
+import org.apache.hadoop.hive.common.type.HiveDecimal;
import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
import org.apache.hadoop.hive.serde2.io.DateWritable;
+import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
import org.apache.hadoop.io.BooleanWritable;
@@ -71,9 +74,10 @@ public class TestVectorizedORCReader {
private final String k;
private final Timestamp t;
private final Date dt;
+ private final HiveDecimal hd;
MyRecord(Boolean bo, Byte by, Integer i, Long l, Short s, Double d, String
k, Timestamp t,
- Date dt) {
+ Date dt, HiveDecimal hd) {
this.bo = bo;
this.by = by;
this.i = i;
@@ -83,6 +87,7 @@ public class TestVectorizedORCReader {
this.k = k;
this.t = t;
this.dt = dt;
+ this.hd = hd;
}
}
@@ -109,13 +114,15 @@ public class TestVectorizedORCReader {
"Heaven,", "we", "were", "all", "going", "direct", "the", "other",
"way"};
String[] dates = new String[] {"1991-02-28", "1970-01-31", "1950-04-23"};
+ String[] decimalStrings = new String[] {"234.443", "10001000",
"0.3333367", "67788798.0", "-234.443",
+ "-10001000", "-0.3333367", "-67788798.0", "0"};
for (int i = 0; i < 21000; ++i) {
if ((i % 7) != 0) {
writer.addRow(new MyRecord(((i % 3) == 0), (byte)(i % 5), i, (long)
200, (short) (300 + i), (double) (400 + i),
words[r1.nextInt(words.length)], new
Timestamp(Calendar.getInstance().getTime().getTime()),
- Date.valueOf(dates[i % 3])));
+ Date.valueOf(dates[i % 3]), HiveDecimal.create(decimalStrings[i %
decimalStrings.length])));
} else {
- writer.addRow(new MyRecord(null, null, i, (long) 200, null, null,
null, null, null));
+ writer.addRow(new MyRecord(null, null, i, (long) 200, null, null,
null, null, null, null));
}
}
writer.close();
@@ -162,6 +169,13 @@ public class TestVectorizedORCReader {
Assert.assertEquals(adt.getTime(), DateWritable.daysToMillis((int)
((LongWritable) b).get()));
continue;
}
+
+ // Decimals are stored as BigInteger, so convert and compare
+ if (a instanceof HiveDecimal) {
+ HiveDecimalWritable dec = (HiveDecimalWritable) b;
+ Assert.assertEquals(a, dec.getHiveDecimal());
+ }
+
if (null == a) {
Assert.assertEquals(true, (b == null || (b instanceof
NullWritable)));
} else {
@@ -180,6 +194,7 @@ public class TestVectorizedORCReader {
Assert.assertEquals(false, batch.cols[6].isRepeating);
Assert.assertEquals(false, batch.cols[7].isRepeating);
Assert.assertEquals(false, batch.cols[8].isRepeating);
+ Assert.assertEquals(false, batch.cols[9].isRepeating);
// Check non null
Assert.assertEquals(false, batch.cols[0].noNulls);
@@ -191,6 +206,7 @@ public class TestVectorizedORCReader {
Assert.assertEquals(false, batch.cols[6].noNulls);
Assert.assertEquals(false, batch.cols[7].noNulls);
Assert.assertEquals(false, batch.cols[8].noNulls);
+ Assert.assertEquals(false, batch.cols[9].noNulls);
}
Assert.assertEquals(false, rr.hasNext());
}