Repository: spark Updated Branches: refs/heads/master 43ef1e52b -> 729996165
[SPARK-14016][SQL] Support high-precision decimals in vectorized parquet reader ## What changes were proposed in this pull request? This patch adds support for reading `DecimalTypes` with high (> 18) precision in `VectorizedColumnReader` ## How was this patch tested? 1. `VectorizedColumnReader` initially had a gating condition on `primitiveType.getDecimalMetadata().getPrecision() > Decimal.MAX_LONG_DIGITS()` that made us fall back on parquet-mr for handling high-precision decimals. This condition is now removed. 2. In particular, the `ParquetHadoopFsRelationSuite` (that tests for all supported hive types -- including `DecimalType(25, 5)`) fails when the gating condition is removed (https://github.com/apache/spark/pull/11808) and should now pass with this change. Author: Sameer Agarwal <[email protected]> Closes #11869 from sameeragarwal/bigdecimal-parquet. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/72999616 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/72999616 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/72999616 Branch: refs/heads/master Commit: 7299961657b5591a3257b21e40f3047db27f221c Parents: 43ef1e5 Author: Sameer Agarwal <[email protected]> Authored: Mon Mar 21 18:19:54 2016 -0700 Committer: Yin Huai <[email protected]> Committed: Mon Mar 21 18:19:54 2016 -0700 ---------------------------------------------------------------------- .../datasources/parquet/VectorizedColumnReader.java | 13 +++++++++++++ .../parquet/VectorizedParquetRecordReader.java | 4 ---- 2 files changed, 13 insertions(+), 4 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/spark/blob/72999616/sql/core/src/main/java/org/apache/spark/sql/execution/datasources/parquet/VectorizedColumnReader.java ---------------------------------------------------------------------- diff --git a/sql/core/src/main/java/org/apache/spark/sql/execution/datasources/parquet/VectorizedColumnReader.java b/sql/core/src/main/java/org/apache/spark/sql/execution/datasources/parquet/VectorizedColumnReader.java index 46c84c5..2c23ccc 100644 --- a/sql/core/src/main/java/org/apache/spark/sql/execution/datasources/parquet/VectorizedColumnReader.java +++ b/sql/core/src/main/java/org/apache/spark/sql/execution/datasources/parquet/VectorizedColumnReader.java @@ -262,6 +262,11 @@ public class VectorizedColumnReader { Binary v = dictionary.decodeToBinary(dictionaryIds.getInt(i)); column.putLong(i, CatalystRowConverter.binaryToUnscaledLong(v)); } + } else if (DecimalType.isByteArrayDecimalType(column.dataType())) { + for (int i = rowId; i < rowId + num; ++i) { + Binary v = dictionary.decodeToBinary(dictionaryIds.getInt(i)); + column.putByteArray(i, v.getBytes()); + } } else { throw new NotImplementedException(); } @@ -368,6 +373,14 @@ public class VectorizedColumnReader { column.putNull(rowId + i); } } + } else if (DecimalType.isByteArrayDecimalType(column.dataType())) { + for (int i = 0; i < num; i++) { + if (defColumn.readInteger() == maxDefLevel) { + column.putByteArray(rowId + i, data.readBinary(arrayLen).getBytes()); + } else { + column.putNull(rowId + i); + } + } } else { throw new NotImplementedException("Unimplemented type: " + column.dataType()); } http://git-wip-us.apache.org/repos/asf/spark/blob/72999616/sql/core/src/main/java/org/apache/spark/sql/execution/datasources/parquet/VectorizedParquetRecordReader.java ---------------------------------------------------------------------- diff --git a/sql/core/src/main/java/org/apache/spark/sql/execution/datasources/parquet/VectorizedParquetRecordReader.java b/sql/core/src/main/java/org/apache/spark/sql/execution/datasources/parquet/VectorizedParquetRecordReader.java index ef44b62..9db5c41 100644 --- a/sql/core/src/main/java/org/apache/spark/sql/execution/datasources/parquet/VectorizedParquetRecordReader.java +++ b/sql/core/src/main/java/org/apache/spark/sql/execution/datasources/parquet/VectorizedParquetRecordReader.java @@ -220,10 +220,6 @@ public class VectorizedParquetRecordReader extends SpecificParquetRecordReaderBa originalTypes[i] != OriginalType.INT_8 && originalTypes[i] != OriginalType.INT_16) { throw new IOException("Unsupported type: " + t); } - if (originalTypes[i] == OriginalType.DECIMAL && - primitiveType.getDecimalMetadata().getPrecision() > Decimal.MAX_LONG_DIGITS()) { - throw new IOException("Decimal with high precision is not supported."); - } if (primitiveType.getPrimitiveTypeName() == PrimitiveType.PrimitiveTypeName.INT96) { throw new IOException("Int96 not supported."); } --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
