[
https://issues.apache.org/jira/browse/DRILL-4184?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=16465534#comment-16465534
]
ASF GitHub Bot commented on DRILL-4184:
---------------------------------------
vvysotskyi closed pull request #372: DRILL-4184: support variable length
decimal fields in parquet
URL: https://github.com/apache/drill/pull/372
This is a PR merged from a forked repository.
As GitHub hides the original diff on merge, it is displayed below for
the sake of provenance:
As this is a foreign pull request (from a fork), the diff is supplied
below (as it won't show otherwise due to GitHub magic):
diff --git
a/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/columnreaders/NullableVarLengthValuesColumn.java
b/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/columnreaders/NullableVarLengthValuesColumn.java
index b18a81c606..bcfc812f0b 100644
---
a/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/columnreaders/NullableVarLengthValuesColumn.java
+++
b/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/columnreaders/NullableVarLengthValuesColumn.java
@@ -20,10 +20,14 @@
import io.netty.buffer.DrillBuf;
import java.io.IOException;
+import java.math.BigDecimal;
+import java.nio.ByteBuffer;
import org.apache.drill.common.exceptions.ExecutionSetupException;
import org.apache.drill.exec.vector.ValueVector;
-
+import org.apache.drill.exec.vector.VariableWidthVector;
+import org.apache.drill.exec.util.DecimalUtility;
+import org.apache.drill.exec.vector.FixedWidthVector;
import org.apache.parquet.column.ColumnDescriptor;
import org.apache.parquet.format.SchemaElement;
import org.apache.parquet.hadoop.metadata.ColumnChunkMetaData;
@@ -69,11 +73,16 @@ protected boolean readAndStoreValueSizeInformation() throws
IOException {
if ( currDefLevel == -1 ) {
currDefLevel = pageReader.definitionLevels.readInteger();
}
- if ( columnDescriptor.getMaxDefinitionLevel() > currDefLevel) {
+
+ if (columnDescriptor.getMaxDefinitionLevel() > currDefLevel) {
nullsRead++;
- // set length of zero, each index in the vector defaults to null so no
need to set the nullability
- variableWidthVector.getMutator().setValueLengthSafe(
- valuesReadInCurrentPass + pageReader.valuesReadyToRead, 0);
+ // set length of zero, each index in the vector defaults to null so no
+ // need to set the nullability
+ if (variableWidthVector == null) {
+ addDecimalLength(null); // store null length in BYTES for null value
+ } else {
+
variableWidthVector.getMutator().setValueLengthSafe(valuesReadInCurrentPass +
pageReader.valuesReadyToRead, 0);
+ }
currentValNull = true;
return false;// field is null, no length to add to data vector
}
@@ -83,18 +92,26 @@ protected boolean readAndStoreValueSizeInformation() throws
IOException {
currLengthDeterminingDictVal =
pageReader.dictionaryLengthDeterminingReader.readBytes();
}
currDictValToWrite = currLengthDeterminingDictVal;
- // re-purposing this field here for length in BYTES to prevent
repetitive multiplication/division
+
+ // re-purposing this field here for length in BYTES to prevent
+ // repetitive multiplication/division
dataTypeLengthInBits = currLengthDeterminingDictVal.length();
}
else {
// re-purposing this field here for length in BYTES to prevent
repetitive multiplication/division
dataTypeLengthInBits = pageReader.pageData.getInt((int)
pageReader.readyToReadPosInBytes);
}
- // I think this also needs to happen if it is null for the random access
- boolean success = setSafe(valuesReadInCurrentPass +
pageReader.valuesReadyToRead, pageReader.pageData,
- (int) pageReader.readyToReadPosInBytes + 4, dataTypeLengthInBits);
- if ( ! success ) {
- return true;
+
+ if (variableWidthVector == null) {
+ addDecimalLength(dataTypeLengthInBits); // store decimal length variable
length decimal field
+ }
+ else {
+ // I think this also needs to happen if it is null for the random access
+ boolean success = setSafe(valuesReadInCurrentPass +
pageReader.valuesReadyToRead, pageReader.pageData,
+ (int) pageReader.readyToReadPosInBytes + 4,
dataTypeLengthInBits);
+ if ( ! success ) {
+ return true;
+ }
}
return false;
}
@@ -122,19 +139,34 @@ public void updatePosition() {
protected void readField(long recordsToRead) {
// TODO - unlike most implementations of this method, the
recordsReadInThisIteration field is not set here
// should verify that this is not breaking anything
- currentValNull =
variableWidthVector.getAccessor().getObject(valuesReadInCurrentPass) == null;
+ if (variableWidthVector == null) {
+ currentValNull = getDecimalLength(valuesReadInCurrentPass) == null;
+ }
+ else {
+ currentValNull =
variableWidthVector.getAccessor().getObject(valuesReadInCurrentPass) == null;
+ }
+
// again, I am re-purposing the unused field here, it is a length n BYTES,
not bits
- if (! currentValNull) {
+ if (!currentValNull) {
+ boolean conventionalSetSafe = true;
if (usingDictionary) {
currDictValToWrite = pageReader.dictionaryValueReader.readBytes();
+
+ if (variableWidthVector == null) {
+ ByteBuffer bf = currDictValToWrite.toByteBuffer();
+ BigDecimal bd = DecimalUtility.getBigDecimalFromByteBuffer(bf,
schemaElement.getScale());
+ boolean success = setSafe(valuesReadInCurrentPass, bd);
+ assert success;
+ conventionalSetSafe = false;
+ }
}
- // re-purposing this field here for length in BYTES to prevent
repetitive multiplication/division
- dataTypeLengthInBits =
variableWidthVector.getAccessor().getValueLength(valuesReadInCurrentPass);
- boolean success = setSafe(valuesReadInCurrentPass, pageReader.pageData,
+ if (conventionalSetSafe) {
+ setDataTypeLength();
+ boolean success = setSafe(valuesReadInCurrentPass, pageReader.pageData,
(int) pageReader.readPosInBytes + 4, dataTypeLengthInBits);
- assert success;
+ assert success;
+ }
}
updatePosition();
}
-
}
diff --git
a/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/columnreaders/VarLengthColumnReaders.java
b/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/columnreaders/VarLengthColumnReaders.java
index 39cad0ee36..1d445ff116 100644
---
a/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/columnreaders/VarLengthColumnReaders.java
+++
b/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/columnreaders/VarLengthColumnReaders.java
@@ -32,6 +32,7 @@
import org.apache.drill.exec.vector.NullableDecimal38SparseVector;
import org.apache.drill.exec.vector.NullableVarBinaryVector;
import org.apache.drill.exec.vector.NullableVarCharVector;
+import org.apache.drill.exec.vector.NullableVectorDefinitionSetter;
import org.apache.drill.exec.vector.VarBinaryVector;
import org.apache.drill.exec.vector.VarCharVector;
@@ -55,14 +56,20 @@
@Override
public boolean setSafe(int index, DrillBuf bytebuf, int start, int length)
{
- int width = Decimal28SparseHolder.WIDTH;
BigDecimal intermediate =
DecimalUtility.getBigDecimalFromDrillBuf(bytebuf, start, length,
schemaElement.getScale());
+ return setSafe(index, intermediate);
+ }
+
+ @Override
+ public boolean setSafe(int index, BigDecimal intermediate) {
+ int width = Decimal28SparseHolder.WIDTH;
if (index >= decimal28Vector.getValueCapacity()) {
return false;
}
DecimalUtility.getSparseFromBigDecimal(intermediate,
decimal28Vector.getBuffer(), index * width, schemaElement.getScale(),
- schemaElement.getPrecision(),
Decimal28SparseHolder.nDecimalDigits);
+ schemaElement.getPrecision(),
Decimal28SparseHolder.nDecimalDigits);
+ //((NullableVectorDefinitionSetter)
decimal28Vector.getMutator()).setIndexDefined(index);
return true;
}
@@ -85,14 +92,19 @@ public int capacity() {
@Override
public boolean setSafe(int index, DrillBuf bytebuf, int start, int length)
{
- int width = Decimal28SparseHolder.WIDTH;
BigDecimal intermediate =
DecimalUtility.getBigDecimalFromDrillBuf(bytebuf, start, length,
schemaElement.getScale());
+ return setSafe(index, intermediate);
+ }
+
+ @Override
+ public boolean setSafe(int index, BigDecimal intermediate) {
+ int width = Decimal28SparseHolder.WIDTH;
if (index >= nullableDecimal28Vector.getValueCapacity()) {
return false;
}
DecimalUtility.getSparseFromBigDecimal(intermediate,
nullableDecimal28Vector.getBuffer(), index * width, schemaElement.getScale(),
- schemaElement.getPrecision(),
Decimal28SparseHolder.nDecimalDigits);
+ schemaElement.getPrecision(),
Decimal28SparseHolder.nDecimalDigits);
nullableDecimal28Vector.getMutator().setIndexDefined(index);
return true;
}
@@ -116,14 +128,19 @@ public int capacity() {
@Override
public boolean setSafe(int index, DrillBuf bytebuf, int start, int length)
{
- int width = Decimal38SparseHolder.WIDTH;
BigDecimal intermediate =
DecimalUtility.getBigDecimalFromDrillBuf(bytebuf, start, length,
schemaElement.getScale());
+ return setSafe(index, intermediate);
+ }
+
+ @Override
+ public boolean setSafe(int index, BigDecimal intermediate) {
+ int width = Decimal38SparseHolder.WIDTH;
if (index >= decimal28Vector.getValueCapacity()) {
return false;
}
DecimalUtility.getSparseFromBigDecimal(intermediate,
decimal28Vector.getBuffer(), index * width, schemaElement.getScale(),
- schemaElement.getPrecision(),
Decimal38SparseHolder.nDecimalDigits);
+ schemaElement.getPrecision(),
Decimal38SparseHolder.nDecimalDigits);
return true;
}
@@ -146,15 +163,19 @@ public int capacity() {
@Override
public boolean setSafe(int index, DrillBuf bytebuf, int start, int length)
{
- int width = Decimal38SparseHolder.WIDTH;
BigDecimal intermediate =
DecimalUtility.getBigDecimalFromDrillBuf(bytebuf, start, length,
schemaElement.getScale());
+ return setSafe(index, intermediate);
+ }
+
+ @Override
+ public boolean setSafe(int index, BigDecimal intermediate) {
+ int width = Decimal38SparseHolder.WIDTH;
if (index >= nullableDecimal38Vector.getValueCapacity()) {
return false;
}
-
DecimalUtility.getSparseFromBigDecimal(intermediate,
nullableDecimal38Vector.getBuffer(), index * width, schemaElement.getScale(),
- schemaElement.getPrecision(),
Decimal38SparseHolder.nDecimalDigits);
+ schemaElement.getPrecision(),
Decimal38SparseHolder.nDecimalDigits);
nullableDecimal38Vector.getMutator().setIndexDefined(index);
return true;
}
@@ -195,6 +216,11 @@ public boolean setSafe(int index, DrillBuf bytebuf, int
start, int length) {
return true;
}
+ @Override
+ public boolean setSafe(int index, BigDecimal intermediate) {
+ throw new UnsupportedOperationException("Internal error: VarCharColumn
does not support decimal values");
+ }
+
@Override
public int capacity() {
return varCharVector.getBuffer().capacity();
@@ -232,6 +258,12 @@ public boolean setSafe(int index, DrillBuf value, int
start, int length) {
return true;
}
+ @Override
+ public boolean setSafe(int index, BigDecimal intermediate) {
+ throw new UnsupportedOperationException("Internal error:
NullableVarCharColumn does not support decimal values");
+ }
+
+
@Override
public int capacity() {
return vector.getBuffer().capacity();
@@ -268,6 +300,10 @@ public boolean setSafe(int index, DrillBuf value, int
start, int length) {
return true;
}
+ public boolean setSafe(int index, BigDecimal intermediate) {
+ throw new UnsupportedOperationException("Internal error:
VarBinaryColumn does not support decimal values");
+ }
+
@Override
public int capacity() {
return varBinaryVector.getBuffer().capacity();
@@ -306,6 +342,10 @@ public boolean setSafe(int index, DrillBuf value, int
start, int length) {
return true;
}
+ public boolean setSafe(int index, BigDecimal intermediate) {
+ throw new UnsupportedOperationException("Internal error:
NullableVarBinaryColumn does not support decimal values");
+ }
+
@Override
public int capacity() {
return nullableVarBinaryVector.getBuffer().capacity();
diff --git
a/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/columnreaders/VarLengthValuesColumn.java
b/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/columnreaders/VarLengthValuesColumn.java
index 6a86cea424..0f21a3987f 100644
---
a/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/columnreaders/VarLengthValuesColumn.java
+++
b/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/columnreaders/VarLengthValuesColumn.java
@@ -20,8 +20,15 @@
import io.netty.buffer.DrillBuf;
import java.io.IOException;
+import java.math.BigDecimal;
+import java.util.ArrayList;
import org.apache.drill.common.exceptions.ExecutionSetupException;
+import org.apache.drill.exec.vector.ValueVector.Mutator;
+import org.apache.drill.exec.vector.Decimal28SparseVector;
+import org.apache.drill.exec.vector.Decimal28SparseVector.Accessor;
+import org.apache.drill.exec.vector.Decimal38SparseVector;
+import org.apache.drill.exec.vector.FixedWidthVector;
import org.apache.drill.exec.vector.ValueVector;
import org.apache.drill.exec.vector.VariableWidthVector;
@@ -36,12 +43,27 @@
Binary currLengthDeterminingDictVal;
Binary currDictValToWrite;
VariableWidthVector variableWidthVector;
+ FixedWidthVector fixedWidthVector;
+
+ // decimalLengths list is part of a near-term fix for DRILL-4184.
+ // Decimal[23]8SparseVector classes are fixed width vectors, without ability
to "remember" offsets of
+ // (variable width) field sizes. so, we "remember" the array sizes in
decimalLengths (also used to
+ // "remember" whether a value was null, for nullable decimal columns).
+ // TODO: storage of decimal values should support variable length values in
a much cleaner way than this,
+ // perhaps with a new variable width Decimal vector class.
+ protected ArrayList<Integer> decimalLengths = new ArrayList();
VarLengthValuesColumn(ParquetRecordReader parentReader, int allocateSize,
ColumnDescriptor descriptor,
ColumnChunkMetaData columnChunkMetaData, boolean
fixedLength, V v,
SchemaElement schemaElement) throws
ExecutionSetupException {
super(parentReader, allocateSize, descriptor, columnChunkMetaData,
fixedLength, v, schemaElement);
- variableWidthVector = (VariableWidthVector) valueVec;
+ if (valueVec instanceof VariableWidthVector) {
+ variableWidthVector = (VariableWidthVector) valueVec;
+ }
+ else {
+ fixedWidthVector = (FixedWidthVector) valueVec;
+ dataTypeLengthInBits = this.schemaElement.getPrecision() * 8;
+ }
if
(columnChunkMetaData.getEncodings().contains(Encoding.PLAIN_DICTIONARY)) {
usingDictionary = true;
}
@@ -50,11 +72,51 @@
}
}
+ @Override
+ public void reset() {
+ super.reset();
+ decimalLengths.clear();
+ }
+
+ /**
+ * Use this method to add a length (may be null) to the list of decimal
lengths.
+ * @param val decimal length, or null if the value is NULL
+ */
+ protected void addDecimalLength(Integer val) {
+ decimalLengths.add(val);
+ }
+
+ /**
+ * Use this method to obtain the decimal length from the list of stored
lengths.
+ * @param subscript subscript to obtain
+ * @return Integer object indicating length, or null if the value is NULL
+ */
+ protected Integer getDecimalLength(int subscript) {
+ /* Including this code (now commented out) throws an exception,
demonstrating
+ * that the list is definitely not always accessed at the end.
+ if (subscript != decimalLengths.size() - 1) {
+ throw new UnsupportedOperationException("Accessing decimalLengths
subscript " + subscript + " with size " + decimalLengths.size());
+ }
+ */
+ return decimalLengths.get(subscript);
+ }
+
public abstract boolean setSafe(int index, DrillBuf bytes, int start, int
length);
+ public abstract boolean setSafe(int index, BigDecimal intermediate);
+
+ protected void setDataTypeLength() {
+ if (variableWidthVector == null) {
+ dataTypeLengthInBits = getDecimalLength(valuesReadInCurrentPass);
+ } else {
+ dataTypeLengthInBits =
variableWidthVector.getAccessor().getValueLength(valuesReadInCurrentPass);
+ }
+ }
+
@Override
protected void readField(long recordToRead) {
- dataTypeLengthInBits =
variableWidthVector.getAccessor().getValueLength(valuesReadInCurrentPass);
+ setDataTypeLength();
+
// again, I am re-purposing the unused field here, it is a length n BYTES,
not bits
boolean success = setSafe((int) valuesReadInCurrentPass,
pageReader.pageData,
(int) pageReader.readPosInBytes + 4, dataTypeLengthInBits);
@@ -95,10 +157,14 @@ protected boolean readAndStoreValueSizeInformation()
throws IOException {
// re-purposing this field here for length in BYTES to prevent
repetitive multiplication/division
dataTypeLengthInBits = pageReader.pageData.getInt((int)
pageReader.readyToReadPosInBytes);
}
-
- // this should not fail
- variableWidthVector.getMutator().setValueLengthSafe((int)
valuesReadInCurrentPass + pageReader.valuesReadyToRead,
+ if (variableWidthVector == null) {
+ addDecimalLength(dataTypeLengthInBits); // store length in BYTES for
variable length decimal field
+ }
+ else {
+ // this should not fail
+ variableWidthVector.getMutator().setValueLengthSafe((int)
valuesReadInCurrentPass + pageReader.valuesReadyToRead,
dataTypeLengthInBits);
+ }
return false;
}
diff --git
a/exec/java-exec/src/test/java/org/apache/drill/exec/store/parquet/TestVarlenDecimal.java
b/exec/java-exec/src/test/java/org/apache/drill/exec/store/parquet/TestVarlenDecimal.java
new file mode 100755
index 0000000000..13b76d2092
--- /dev/null
+++
b/exec/java-exec/src/test/java/org/apache/drill/exec/store/parquet/TestVarlenDecimal.java
@@ -0,0 +1,70 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.drill.exec.store.parquet;
+
+import org.apache.drill.BaseTestQuery;
+import org.apache.drill.exec.planner.physical.PlannerSettings;
+import org.apache.drill.exec.proto.UserBitShared;
+import org.junit.BeforeClass;
+import org.junit.Test;
+
+public class TestVarlenDecimal extends BaseTestQuery {
+ // enable decimal data type
+ @BeforeClass
+ public static void enableDecimalDataType() throws Exception {
+ test(String.format("alter session set `%s` = true",
PlannerSettings.ENABLE_DECIMAL_DATA_TYPE_KEY));
+ }
+
+ private static final String DATAFILE = "cp.`parquet/varlenDecimal.parquet`";
+
+ @Test
+ public void testNullCount() throws Exception {
+ String query = String.format("select count(*) as c from %s where
department_id is null", DATAFILE);
+ testBuilder()
+ .sqlQuery(query)
+ .unOrdered()
+ .baselineColumns("c")
+ .baselineValues(1L)
+ .build()
+ .run();
+ }
+
+ @Test
+ public void testNotNullCount() throws Exception {
+ String query = String.format("select count(*) as c from %s where
department_id is not null", DATAFILE);
+ testBuilder()
+ .sqlQuery(query)
+ .unOrdered()
+ .baselineColumns("c")
+ .baselineValues(106L)
+ .build()
+ .run();
+ }
+
+ @Test
+ public void testSimpleQuery() throws Exception {
+ String query = String.format("select cast(department_id as bigint) as c
from %s where cast(employee_id as decimal) = 170", DATAFILE);
+ testBuilder()
+ .sqlQuery(query)
+ .unOrdered()
+ .baselineColumns("c")
+ .baselineValues(80L)
+ .build()
+ .run();
+ }
+}
diff --git a/exec/java-exec/src/test/resources/parquet/varlenDecimal.parquet
b/exec/java-exec/src/test/resources/parquet/varlenDecimal.parquet
new file mode 100755
index 0000000000..c531ef13eb
Binary files /dev/null and
b/exec/java-exec/src/test/resources/parquet/varlenDecimal.parquet differ
diff --git
a/exec/vector/src/main/java/org/apache/drill/exec/util/DecimalUtility.java
b/exec/vector/src/main/java/org/apache/drill/exec/util/DecimalUtility.java
index a87fe4c8cc..243982fd6b 100644
--- a/exec/vector/src/main/java/org/apache/drill/exec/util/DecimalUtility.java
+++ b/exec/vector/src/main/java/org/apache/drill/exec/util/DecimalUtility.java
@@ -152,12 +152,25 @@ public static BigDecimal
getBigDecimalFromIntermediate(ByteBuf data, int startIn
return getBigDecimalFromDrillBuf(data, startIndex, nDecimalDigits,
scale, false);
}
+ public static BigDecimal getBigDecimalFromSparse(ByteBuf data, int
startIndex, int nDecimalDigits, int scale) {
+
+ // In the sparse representation we pad the scale with zeroes for ease
of arithmetic, need to truncate
+ return getBigDecimalFromDrillBuf(data, startIndex, nDecimalDigits,
scale, true);
+ }
+
public static BigDecimal getBigDecimalFromSparse(DrillBuf data, int
startIndex, int nDecimalDigits, int scale) {
// In the sparse representation we pad the scale with zeroes for ease
of arithmetic, need to truncate
return getBigDecimalFromDrillBuf(data, startIndex, nDecimalDigits,
scale, true);
}
+ public static BigDecimal getBigDecimalFromDrillBuf(ByteBuf bytebuf, int
start, int length, int scale) {
+ byte[] value = new byte[length];
+ bytebuf.getBytes(start, value, 0, length);
+ BigInteger unscaledValue = new BigInteger(value);
+ return new BigDecimal(unscaledValue, scale);
+ }
+
public static BigDecimal getBigDecimalFromDrillBuf(DrillBuf bytebuf, int
start, int length, int scale) {
byte[] value = new byte[length];
bytebuf.getBytes(start, value, 0, length);
@@ -165,9 +178,19 @@ public static BigDecimal
getBigDecimalFromDrillBuf(DrillBuf bytebuf, int start,
return new BigDecimal(unscaledValue, scale);
}
- public static BigDecimal getBigDecimalFromByteBuffer(ByteBuffer bytebuf, int
start, int length, int scale) {
- byte[] value = new byte[length];
- bytebuf.get(value);
+ public static BigDecimal getBigDecimalFromByteBuffer(ByteBuffer bytebuf, int
scale) {
+ byte[] value;
+ if (bytebuf.hasArray()) {
+ value = bytebuf.array();
+ }
+ else {
+ // rewinding bytebuf gives extra bytes here, and an incorrect decimal.
+ // instead, we save position, get bytes through the end, and restore the
position.
+ int savePosition = bytebuf.position();
+ value = new byte[bytebuf.remaining()];
+ bytebuf.get(value);
+ bytebuf.position(savePosition); // restore bytebuf position, which may
not be necessary
+ }
BigInteger unscaledValue = new BigInteger(value);
return new BigDecimal(unscaledValue, scale);
}
@@ -288,7 +311,7 @@ public static BigDecimal getBigDecimalFromDense(DrillBuf
data, int startIndex, i
intermediate.release();
}
- }
+ }
/*
* Function converts the BigDecimal and stores it in out internal sparse
representation
----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
For queries about this service, please contact Infrastructure at:
[email protected]
> Drill does not support Parquet DECIMAL values in variable length BINARY fields
> ------------------------------------------------------------------------------
>
> Key: DRILL-4184
> URL: https://issues.apache.org/jira/browse/DRILL-4184
> Project: Apache Drill
> Issue Type: Bug
> Components: Storage - Parquet
> Affects Versions: 1.4.0
> Environment: Windows 7 Professional, Java 1.8.0_66
> Reporter: Dave Oshinsky
> Priority: Major
>
> Encoding a DECIMAL logical type in Parquet using the variable length BINARY
> primitive type is not supported by Drill as of versions 1.3.0 and 1.4.0. The
> problem first surfaces with the ClassCastException shown below, but fixing
> the immediate cause of the exception is not sufficient to support this
> combination (DECIMAL, BINARY) in a Parquet file.
> In Drill, DECIMAL is currently assumed to be INT32, INT64, INT96, or
> FIXED_LEN_BINARY_ARRAY. Are there any plans to support DECIMAL with variable
> length BINARY? Avro definitely supports encoding DECIMAL in variable length
> bytes (see https://avro.apache.org/docs/current/spec.html#Decimal), but this
> support in Parquet is less clear.
> Selecting on a BINARY DECIMAL field in a parquet file throws an exception as
> shown below (java.lang.ClassCastException:
> org.apache.drill.exec.vector.Decimal28SparseVector cannot be cast to
> org.apache.drill.exec.vector.VariableWidthVector). The successful query at
> bottom selected on a string field in the same file.
> 0: jdbc:drill:zk=local> select count(*) from
> dfs.`c:/dao/DBArchivePredictor/tenrows.parquet` where acct_no=70000020;
> org.apache.drill.common.exceptions.DrillRuntimeException: Error in parquet
> recor
> d reader.
> Message: Failure in setting up reader
> Parquet Metadata: ParquetMetaData{FileMetaData{schema: message sbi.acct_mstr {
> required binary ACCT_NO (DECIMAL(20,0));
> optional binary SF_NO (UTF8);
> optional binary LF_NO (UTF8);
> optional binary BRANCH_NO (DECIMAL(20,0));
> optional binary INTRO_CUST_NO (DECIMAL(20,0));
> optional binary INTRO_ACCT_NO (DECIMAL(20,0));
> optional binary INTRO_SIGN (UTF8);
> optional binary TYPE (UTF8);
> optional binary OPR_MODE (UTF8);
> optional binary CUR_ACCT_TYPE (UTF8);
> optional binary TITLE (UTF8);
> optional binary CORP_CUST_NO (DECIMAL(20,0));
> optional binary APLNDT (UTF8);
> optional binary OPNDT (UTF8);
> optional binary VERI_EMP_NO (DECIMAL(20,0));
> optional binary VERI_SIGN (UTF8);
> optional binary MANAGER_SIGN (UTF8);
> optional binary CURBAL (DECIMAL(8,2));
> optional binary STATUS (UTF8);
> }
> , metadata:
> {parquet.avro.schema={"type":"record","name":"acct_mstr","namespace"
> :"sbi","fields":[{"name":"ACCT_NO","type":{"type":"bytes","logicalType":"decimal
> ","precision":20,"scale":0,"cv_auto_incr":false,"cv_case_sensitive":false,"cv_co
> lumn_class":"java.math.BigDecimal","cv_connection":"oracle.jdbc.driver.T4CConnec
> tion","cv_currency":true,"cv_def_writable":false,"cv_nullable":0,"cv_precision":
> 20,"cv_read_only":false,"cv_scale":0,"cv_searchable":true,"cv_signed":true,"cv_s
> ubscript":1,"cv_type":2,"cv_typename":"NUMBER","cv_writable":true}},{"name":"SF_
> NO","type":["null",{"type":"string","cv_auto_incr":false,"cv_case_sensitive":tru
> e,"cv_column_class":"java.lang.String","cv_currency":false,"cv_def_writable":fal
> se,"cv_nullable":1,"cv_precision":10,"cv_read_only":false,"cv_scale":0,"cv_searc
> hable":true,"cv_signed":true,"cv_subscript":2,"cv_type":12,"cv_typename":"VARCHA
> R2","cv_writable":true}]},{"name":"LF_NO","type":["null",{"type":"string","cv_au
> to_incr":false,"cv_case_sensitive":true,"cv_column_class":"java.lang.String","cv
> _currency":false,"cv_def_writable":false,"cv_nullable":1,"cv_precision":10,"cv_r
> ead_only":false,"cv_scale":0,"cv_searchable":true,"cv_signed":true,"cv_subscript
> ":3,"cv_type":12,"cv_typename":"VARCHAR2","cv_writable":true}]},{"name":"BRANCH_
> NO","type":["null",{"type":"bytes","logicalType":"decimal","precision":20,"scale
> ":0,"cv_auto_incr":false,"cv_case_sensitive":false,"cv_column_class":"java.math.
> BigDecimal","cv_currency":true,"cv_def_writable":false,"cv_nullable":1,"cv_preci
> sion":20,"cv_read_only":false,"cv_scale":0,"cv_searchable":true,"cv_signed":true
> ,"cv_subscript":4,"cv_type":2,"cv_typename":"NUMBER","cv_writable":true}]},{"nam
> e":"INTRO_CUST_NO","type":["null",{"type":"bytes","logicalType":"decimal","preci
> sion":20,"scale":0,"cv_auto_incr":false,"cv_case_sensitive":false,"cv_column_cla
> ss":"java.math.BigDecimal","cv_currency":true,"cv_def_writable":false,"cv_nullab
> le":1,"cv_precision":20,"cv_read_only":false,"cv_scale":0,"cv_searchable":true,"
> cv_signed":true,"cv_subscript":5,"cv_type":2,"cv_typename":"NUMBER","cv_writable
> ":true}]},{"name":"INTRO_ACCT_NO","type":["null",{"type":"bytes","logicalType":"
> decimal","precision":20,"scale":0,"cv_auto_incr":false,"cv_case_sensitive":false
> ,"cv_column_class":"java.math.BigDecimal","cv_currency":true,"cv_def_writable":f
> alse,"cv_nullable":1,"cv_precision":20,"cv_read_only":false,"cv_scale":0,"cv_sea
> rchable":true,"cv_signed":true,"cv_subscript":6,"cv_type":2,"cv_typename":"NUMBE
> R","cv_writable":true}]},{"name":"INTRO_SIGN","type":["null",{"type":"string","c
> v_auto_incr":false,"cv_case_sensitive":true,"cv_column_class":"java.lang.String"
> ,"cv_currency":false,"cv_def_writable":false,"cv_nullable":1,"cv_precision":1,"c
> v_read_only":false,"cv_scale":0,"cv_searchable":true,"cv_signed":true,"cv_subscr
> ipt":7,"cv_type":12,"cv_typename":"VARCHAR2","cv_writable":true}]},{"name":"TYPE
> ","type":["null",{"type":"string","cv_auto_incr":false,"cv_case_sensitive":true,
> "cv_column_class":"java.lang.String","cv_currency":false,"cv_def_writable":false
> ,"cv_nullable":1,"cv_precision":2,"cv_read_only":false,"cv_scale":0,"cv_searchab
> le":true,"cv_signed":true,"cv_subscript":8,"cv_type":12,"cv_typename":"VARCHAR2"
> ,"cv_writable":true}]},{"name":"OPR_MODE","type":["null",{"type":"string","cv_au
> to_incr":false,"cv_case_sensitive":true,"cv_column_class":"java.lang.String","cv
> _currency":false,"cv_def_writable":false,"cv_nullable":1,"cv_precision":2,"cv_re
> ad_only":false,"cv_scale":0,"cv_searchable":true,"cv_signed":true,"cv_subscript"
> :9,"cv_type":12,"cv_typename":"VARCHAR2","cv_writable":true}]},{"name":"CUR_ACCT
> _TYPE","type":["null",{"type":"string","cv_auto_incr":false,"cv_case_sensitive":
> true,"cv_column_class":"java.lang.String","cv_currency":false,"cv_def_writable":
> false,"cv_nullable":1,"cv_precision":4,"cv_read_only":false,"cv_scale":0,"cv_sea
> rchable":true,"cv_signed":true,"cv_subscript":10,"cv_type":12,"cv_typename":"VAR
> CHAR2","cv_writable":true}]},{"name":"TITLE","type":["null",{"type":"string","cv
> _auto_incr":false,"cv_case_sensitive":true,"cv_column_class":"java.lang.String",
> "cv_currency":false,"cv_def_writable":false,"cv_nullable":1,"cv_precision":30,"c
> v_read_only":false,"cv_scale":0,"cv_searchable":true,"cv_signed":true,"cv_subscr
> ipt":11,"cv_type":12,"cv_typename":"VARCHAR2","cv_writable":true}]},{"name":"COR
> P_CUST_NO","type":["null",{"type":"bytes","logicalType":"decimal","precision":20
> ,"scale":0,"cv_auto_incr":false,"cv_case_sensitive":false,"cv_column_class":"jav
> a.math.BigDecimal","cv_currency":true,"cv_def_writable":false,"cv_nullable":1,"c
> v_precision":20,"cv_read_only":false,"cv_scale":0,"cv_searchable":true,"cv_signe
> d":true,"cv_subscript":12,"cv_type":2,"cv_typename":"NUMBER","cv_writable":true}
> ]},{"name":"APLNDT","type":["null",{"type":"string","cv_auto_incr":false,"cv_cas
> e_sensitive":false,"cv_column_class":"java.sql.Timestamp","cv_currency":false,"c
> v_def_writable":false,"cv_nullable":1,"cv_precision":0,"cv_read_only":false,"cv_
> scale":0,"cv_searchable":true,"cv_signed":true,"cv_subscript":13,"cv_type":93,"c
> v_typename":"DATE","cv_writable":true}]},{"name":"OPNDT","type":["null",{"type":
> "string","cv_auto_incr":false,"cv_case_sensitive":false,"cv_column_class":"java.
> sql.Timestamp","cv_currency":false,"cv_def_writable":false,"cv_nullable":1,"cv_p
> recision":0,"cv_read_only":false,"cv_scale":0,"cv_searchable":true,"cv_signed":t
> rue,"cv_subscript":14,"cv_type":93,"cv_typename":"DATE","cv_writable":true}]},{"
> name":"VERI_EMP_NO","type":["null",{"type":"bytes","logicalType":"decimal","prec
> ision":20,"scale":0,"cv_auto_incr":false,"cv_case_sensitive":false,"cv_column_cl
> ass":"java.math.BigDecimal","cv_currency":true,"cv_def_writable":false,"cv_nulla
> ble":1,"cv_precision":20,"cv_read_only":false,"cv_scale":0,"cv_searchable":true,
> "cv_signed":true,"cv_subscript":15,"cv_type":2,"cv_typename":"NUMBER","cv_writab
> le":true}]},{"name":"VERI_SIGN","type":["null",{"type":"string","cv_auto_incr":f
> alse,"cv_case_sensitive":true,"cv_column_class":"java.lang.String","cv_currency"
> :false,"cv_def_writable":false,"cv_nullable":1,"cv_precision":1,"cv_read_only":f
> alse,"cv_scale":0,"cv_searchable":true,"cv_signed":true,"cv_subscript":16,"cv_ty
> pe":12,"cv_typename":"VARCHAR2","cv_writable":true}]},{"name":"MANAGER_SIGN","ty
> pe":["null",{"type":"string","cv_auto_incr":false,"cv_case_sensitive":true,"cv_c
> olumn_class":"java.lang.String","cv_currency":false,"cv_def_writable":false,"cv_
> nullable":1,"cv_precision":1,"cv_read_only":false,"cv_scale":0,"cv_searchable":t
> rue,"cv_signed":true,"cv_subscript":17,"cv_type":12,"cv_typename":"VARCHAR2","cv
> _writable":true}]},{"name":"CURBAL","type":["null",{"type":"bytes","logicalType"
> :"decimal","precision":8,"scale":2,"cv_auto_incr":false,"cv_case_sensitive":fals
> e,"cv_column_class":"java.math.BigDecimal","cv_currency":true,"cv_def_writable":
> false,"cv_nullable":1,"cv_precision":8,"cv_read_only":false,"cv_scale":2,"cv_sea
> rchable":true,"cv_signed":true,"cv_subscript":18,"cv_type":2,"cv_typename":"NUMB
> ER","cv_writable":true}]},{"name":"STATUS","type":["null",{"type":"string","cv_a
> uto_incr":false,"cv_case_sensitive":true,"cv_column_class":"java.lang.String","c
> v_currency":false,"cv_def_writable":false,"cv_nullable":1,"cv_precision":1,"cv_r
> ead_only":false,"cv_scale":0,"cv_searchable":true,"cv_signed":true,"cv_subscript
> ":19,"cv_type":12,"cv_typename":"VARCHAR2","cv_writable":true}]}]}}}, blocks:
> [B
> lockMetaData{10, 1281 [ColumnMetaData{SNAPPY [ACCT_NO] BINARY [BIT_PACKED,
> PLAI
> N], 4}, ColumnMetaData{SNAPPY [SF_NO] BINARY [RLE, BIT_PACKED,
> PLAIN_DICTIONARY
> ], 88}, ColumnMetaData{SNAPPY [LF_NO] BINARY [RLE, BIT_PACKED,
> PLAIN_DICTIONARY
> ], 163}, ColumnMetaData{SNAPPY [BRANCH_NO] BINARY [RLE, BIT_PACKED,
> PLAIN_DICTI
> ONARY], 241}, ColumnMetaData{SNAPPY [INTRO_CUST_NO] BINARY [RLE, BIT_PACKED,
> PL
> AIN_DICTIONARY], 298}, ColumnMetaData{SNAPPY [INTRO_ACCT_NO] BINARY [RLE,
> BIT_P
> ACKED, PLAIN_DICTIONARY], 364}, ColumnMetaData{SNAPPY [INTRO_SIGN] BINARY
> [RLE,
> BIT_PACKED, PLAIN_DICTIONARY], 421}, ColumnMetaData{SNAPPY [TYPE] BINARY
> [RLE,
> BIT_PACKED, PLAIN_DICTIONARY], 478}, ColumnMetaData{SNAPPY [OPR_MODE] BINARY
> [
> RLE, BIT_PACKED, PLAIN_DICTIONARY], 538}, ColumnMetaData{SNAPPY
> [CUR_ACCT_TYPE]
> BINARY [RLE, BIT_PACKED, PLAIN_DICTIONARY], 598}, ColumnMetaData{SNAPPY
> [TITLE]
> BINARY [RLE, BIT_PACKED, PLAIN_DICTIONARY], 658}, ColumnMetaData{SNAPPY
> [CORP_
> CUST_NO] BINARY [RLE, BIT_PACKED, PLAIN_DICTIONARY], 736},
> ColumnMetaData{SNAPP
> Y [APLNDT] BINARY [RLE, BIT_PACKED, PLAIN_DICTIONARY], 802},
> ColumnMetaData{SNA
> PPY [OPNDT] BINARY [RLE, BIT_PACKED, PLAIN_DICTIONARY], 919},
> ColumnMetaData{SN
> APPY [VERI_EMP_NO] BINARY [RLE, BIT_PACKED, PLAIN_DICTIONARY], 1036},
> ColumnMet
> aData{SNAPPY [VERI_SIGN] BINARY [RLE, BIT_PACKED, PLAIN_DICTIONARY], 1093},
> Col
> umnMetaData{SNAPPY [MANAGER_SIGN] BINARY [RLE, BIT_PACKED,
> PLAIN_DICTIONARY], 1
> 150}, ColumnMetaData{SNAPPY [CURBAL] BINARY [RLE, BIT_PACKED,
> PLAIN_DICTIONARY]
> , 1207}, ColumnMetaData{SNAPPY [STATUS] BINARY [RLE, BIT_PACKED,
> PLAIN_DICTIONA
> RY], 1270}]}]}
> at
> org.apache.drill.exec.store.parquet.columnreaders.ParquetRecordReader
> .handleAndRaise(ParquetRecordReader.java:346)
> at
> org.apache.drill.exec.store.parquet.columnreaders.ParquetRecordReader
> .setup(ParquetRecordReader.java:339)
> at
> org.apache.drill.exec.physical.impl.ScanBatch.<init>(ScanBatch.java:1
> 01)
> at
> org.apache.drill.exec.store.parquet.ParquetScanBatchCreator.getBatch(
> ParquetScanBatchCreator.java:168)
> at
> org.apache.drill.exec.store.parquet.ParquetScanBatchCreator.getBatch(
> ParquetScanBatchCreator.java:56)
> at
> org.apache.drill.exec.physical.impl.ImplCreator.getRecordBatch(ImplCr
> eator.java:151)
> at
> org.apache.drill.exec.physical.impl.ImplCreator.getChildren(ImplCreat
> or.java:174)
> at
> org.apache.drill.exec.physical.impl.ImplCreator.getRecordBatch(ImplCr
> eator.java:131)
> at
> org.apache.drill.exec.physical.impl.ImplCreator.getChildren(ImplCreat
> or.java:174)
> at
> org.apache.drill.exec.physical.impl.ImplCreator.getRecordBatch(ImplCr
> eator.java:131)
> at
> org.apache.drill.exec.physical.impl.ImplCreator.getChildren(ImplCreat
> or.java:174)
> at
> org.apache.drill.exec.physical.impl.ImplCreator.getRecordBatch(ImplCr
> eator.java:131)
> at
> org.apache.drill.exec.physical.impl.ImplCreator.getChildren(ImplCreat
> or.java:174)
> at
> org.apache.drill.exec.physical.impl.ImplCreator.getRecordBatch(ImplCr
> eator.java:131)
> at
> org.apache.drill.exec.physical.impl.ImplCreator.getChildren(ImplCreat
> or.java:174)
> at
> org.apache.drill.exec.physical.impl.ImplCreator.getRecordBatch(ImplCr
> eator.java:131)
> at
> org.apache.drill.exec.physical.impl.ImplCreator.getChildren(ImplCreat
> or.java:174)
> at
> org.apache.drill.exec.physical.impl.ImplCreator.getRootExec(ImplCreat
> or.java:105)
> at
> org.apache.drill.exec.physical.impl.ImplCreator.getExec(ImplCreator.j
> ava:79)
> at
> org.apache.drill.exec.work.fragment.FragmentExecutor.run(FragmentExec
> utor.java:230)
> at
> org.apache.drill.common.SelfCleaningRunnable.run(SelfCleaningRunnable
> .java:38)
> at
> java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.
> java:1142)
> at
> java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor
> .java:617)
> at java.lang.Thread.run(Thread.java:745)
> Caused by: java.lang.ClassCastException:
> org.apache.drill.exec.vector.Decimal28SparseVector cannot be cast to
> org.apache.drill.exec.vector.VariableWidthVector
> at
> org.apache.drill.exec.store.parquet.columnreaders.VarLengthValuesColu
> mn.<init>(VarLengthValuesColumn.java:44)
> at
> org.apache.drill.exec.store.parquet.columnreaders.VarLengthColumnRead
> ers$Decimal28Column.<init>(VarLengthColumnReaders.java:52)
> at
> org.apache.drill.exec.store.parquet.columnreaders.ColumnReaderFactory
> .getReader(ColumnReaderFactory.java:178)
> at
> org.apache.drill.exec.store.parquet.columnreaders.ParquetRecordReader
> .setup(ParquetRecordReader.java:319)
> ... 22 more
> Error: SYSTEM ERROR: ClassCastException:
> org.apache.drill.exec.vector.Decimal28S
> parseVector cannot be cast to org.apache.drill.exec.vector.VariableWidthVector
> Fragment 0:0
> [Error Id: 22bfa8dd-1129-4300-9449-409e96d6c800 on
> DaveOshinsky-PC.gp.cv.commvau
> lt.com:31010] (state=,code=0)
> 0: jdbc:drill:zk=local> select count(*) from
> dfs.`c:/dao/DBArchivePredictor/tenr
> ows.parquet` where opr_mode='JO';
> +---------+
> | EXPR$0 |
> +---------+
> | 10 |
> +---------+
> 1 row selected (0.406 seconds)
> 0: jdbc:drill:zk=local>
> The immediate cause of this exception is that Drill, in
> org.apache.drill.exec.store.parquet.columnreaders.ParquetRecordReader,
> assumes that all BINARY values are encoded in VariableWidthVectors. For
> BINARY DECIMAL, this is not true, as for example Decimal28SparseVector is a
> FixedWidthVector, not a VariableWidthVector. The assumption that DECIMAL is
> not encoded in variable length BINARY is found in a number of other places in
> the Drill code, including:
> org.apache.drill.exec.store.parquet.columnreaders.ColumnReaderFactory only
> contains logic to handle DECIMAL with INT32, INT64, INT96, or
> FIXED_LEN_BYTE_ARRAY. BINARY is not supported with DECIMAL.
> org.apache.drill.exec.store.parquet.columnreaders.NullableFixedByteAlignedReaders
> does not support a nullable reader for BINARY in getNullableColumnReader
> method.
--
This message was sent by Atlassian JIRA
(v7.6.3#76005)