[CARBONDATA-1946] Exception thrown after alter data type change operation on dictionary exclude integer type column
Problem: After restructure change data type operation (INT to BIGINT) on dictionary exclude INT type column if select query is triggered then exception is thrown. Analysis: This is happening because while retrieving the data the vector is created for BIGINT type (size 8 bytes) which but the actual length of each data is 4 bytes and there is length check while reading the data which is failing. Solution: Added a new restructuredType variable in vector and assigned the block dimension data type to it. This closes #1732 Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/1f54c472 Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/1f54c472 Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/1f54c472 Branch: refs/heads/branch-1.3 Commit: 1f54c47282bc201f2071bc8c9cc1be19baf0c9a1 Parents: 38038ad Author: manishgupta88 <[email protected]> Authored: Wed Dec 27 23:09:58 2017 +0530 Committer: ravipesala <[email protected]> Committed: Thu Jan 4 20:31:10 2018 +0530 ---------------------------------------------------------------------- ...feVariableLengthDimensionDataChunkStore.java | 6 +++-- ...afeVariableLengthDimesionDataChunkStore.java | 6 +++-- .../DictionaryBasedVectorResultCollector.java | 4 ++++ .../scan/executor/util/RestructureUtil.java | 2 -- .../scan/result/vector/CarbonColumnVector.java | 14 ++++++++++++ .../vector/impl/CarbonColumnVectorImpl.java | 12 ++++++++++ .../carbondata/core/util/DataTypeUtil.java | 23 +++++++++++++++++++- .../presto/CarbonColumnVectorWrapper.java | 12 ++++++++++ .../vectorreader/ColumnarVectorWrapper.java | 12 ++++++++++ .../vectorreader/ChangeDataTypeTestCases.scala | 15 +++++++++++++ 10 files changed, 99 insertions(+), 7 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/carbondata/blob/1f54c472/core/src/main/java/org/apache/carbondata/core/datastore/chunk/store/impl/safe/SafeVariableLengthDimensionDataChunkStore.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/carbondata/core/datastore/chunk/store/impl/safe/SafeVariableLengthDimensionDataChunkStore.java b/core/src/main/java/org/apache/carbondata/core/datastore/chunk/store/impl/safe/SafeVariableLengthDimensionDataChunkStore.java index f0d18dc..db83198 100644 --- a/core/src/main/java/org/apache/carbondata/core/datastore/chunk/store/impl/safe/SafeVariableLengthDimensionDataChunkStore.java +++ b/core/src/main/java/org/apache/carbondata/core/datastore/chunk/store/impl/safe/SafeVariableLengthDimensionDataChunkStore.java @@ -24,7 +24,7 @@ import org.apache.carbondata.core.metadata.datatype.DataType; import org.apache.carbondata.core.metadata.datatype.DataTypes; import org.apache.carbondata.core.scan.result.vector.CarbonColumnVector; import org.apache.carbondata.core.util.ByteUtil; - +import org.apache.carbondata.core.util.DataTypeUtil; /** * Below class is responsible to store variable length dimension data chunk in @@ -153,7 +153,9 @@ public class SafeVariableLengthDimensionDataChunkStore extends SafeAbsractDimens } else if (dt == DataTypes.INT) { vector.putInt(vectorRow, ByteUtil.toInt(data, currentDataOffset, length)); } else if (dt == DataTypes.LONG) { - vector.putLong(vectorRow, ByteUtil.toLong(data, currentDataOffset, length)); + vector.putLong(vectorRow, DataTypeUtil + .getDataBasedOnRestructuredDataType(data, vector.getBlockDataType(), + currentDataOffset, length)); } else if (dt == DataTypes.TIMESTAMP) { vector.putLong(vectorRow, ByteUtil.toLong(data, currentDataOffset, length) * 1000L); } http://git-wip-us.apache.org/repos/asf/carbondata/blob/1f54c472/core/src/main/java/org/apache/carbondata/core/datastore/chunk/store/impl/unsafe/UnsafeVariableLengthDimesionDataChunkStore.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/carbondata/core/datastore/chunk/store/impl/unsafe/UnsafeVariableLengthDimesionDataChunkStore.java b/core/src/main/java/org/apache/carbondata/core/datastore/chunk/store/impl/unsafe/UnsafeVariableLengthDimesionDataChunkStore.java index d6af052..36b2bd8 100644 --- a/core/src/main/java/org/apache/carbondata/core/datastore/chunk/store/impl/unsafe/UnsafeVariableLengthDimesionDataChunkStore.java +++ b/core/src/main/java/org/apache/carbondata/core/datastore/chunk/store/impl/unsafe/UnsafeVariableLengthDimesionDataChunkStore.java @@ -25,7 +25,7 @@ import org.apache.carbondata.core.metadata.datatype.DataType; import org.apache.carbondata.core.metadata.datatype.DataTypes; import org.apache.carbondata.core.scan.result.vector.CarbonColumnVector; import org.apache.carbondata.core.util.ByteUtil; - +import org.apache.carbondata.core.util.DataTypeUtil; /** * Below class is responsible to store variable length dimension data chunk in @@ -177,7 +177,9 @@ public class UnsafeVariableLengthDimesionDataChunkStore } else if (dt == DataTypes.INT) { vector.putInt(vectorRow, ByteUtil.toInt(value, 0, value.length)); } else if (dt == DataTypes.LONG) { - vector.putLong(vectorRow, ByteUtil.toLong(value, 0, value.length)); + vector.putLong(vectorRow, DataTypeUtil + .getDataBasedOnRestructuredDataType(value, vector.getBlockDataType(), 0, + value.length)); } else if (dt == DataTypes.TIMESTAMP) { vector.putLong(vectorRow, ByteUtil.toLong(value, 0, value.length) * 1000L); } http://git-wip-us.apache.org/repos/asf/carbondata/blob/1f54c472/core/src/main/java/org/apache/carbondata/core/scan/collector/impl/DictionaryBasedVectorResultCollector.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/carbondata/core/scan/collector/impl/DictionaryBasedVectorResultCollector.java b/core/src/main/java/org/apache/carbondata/core/scan/collector/impl/DictionaryBasedVectorResultCollector.java index 10888fe..5e6c99a 100644 --- a/core/src/main/java/org/apache/carbondata/core/scan/collector/impl/DictionaryBasedVectorResultCollector.java +++ b/core/src/main/java/org/apache/carbondata/core/scan/collector/impl/DictionaryBasedVectorResultCollector.java @@ -181,6 +181,10 @@ public class DictionaryBasedVectorResultCollector extends AbstractScannedResultC allColumnInfo[i].offset = rowCounter; allColumnInfo[i].vectorOffset = columnarBatch.getRowCounter(); allColumnInfo[i].vector = columnarBatch.columnVectors[i]; + if (null != allColumnInfo[i].dimension) { + allColumnInfo[i].vector + .setBlockDataType(allColumnInfo[i].dimension.getDimension().getDataType()); + } } } http://git-wip-us.apache.org/repos/asf/carbondata/blob/1f54c472/core/src/main/java/org/apache/carbondata/core/scan/executor/util/RestructureUtil.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/carbondata/core/scan/executor/util/RestructureUtil.java b/core/src/main/java/org/apache/carbondata/core/scan/executor/util/RestructureUtil.java index b3a77b8..572400d 100644 --- a/core/src/main/java/org/apache/carbondata/core/scan/executor/util/RestructureUtil.java +++ b/core/src/main/java/org/apache/carbondata/core/scan/executor/util/RestructureUtil.java @@ -83,8 +83,6 @@ public class RestructureUtil { if (tableDimension.getColumnId().equals(queryDimension.getDimension().getColumnId())) { QueryDimension currentBlockDimension = new QueryDimension(tableDimension.getColName()); tableDimension.getColumnSchema() - .setDataType(queryDimension.getDimension().getDataType()); - tableDimension.getColumnSchema() .setPrecision(queryDimension.getDimension().getColumnSchema().getPrecision()); tableDimension.getColumnSchema() .setScale(queryDimension.getDimension().getColumnSchema().getScale()); http://git-wip-us.apache.org/repos/asf/carbondata/blob/1f54c472/core/src/main/java/org/apache/carbondata/core/scan/result/vector/CarbonColumnVector.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/carbondata/core/scan/result/vector/CarbonColumnVector.java b/core/src/main/java/org/apache/carbondata/core/scan/result/vector/CarbonColumnVector.java index 40a52e3..b606a50 100644 --- a/core/src/main/java/org/apache/carbondata/core/scan/result/vector/CarbonColumnVector.java +++ b/core/src/main/java/org/apache/carbondata/core/scan/result/vector/CarbonColumnVector.java @@ -67,6 +67,20 @@ public interface CarbonColumnVector { DataType getType(); + /** + * Method to be used for getting the restructured data type. This method will used for + * retrieving the data after change in data type restructure operation + * + * @return + */ + DataType getBlockDataType(); + + /** + * Method to be used for setting the restructured data type. This method will used for + * retrieving the data after change in data type restructure operation + */ + void setBlockDataType(DataType blockDataType); + void setFilteredRowsExist(boolean filteredRowsExist); } http://git-wip-us.apache.org/repos/asf/carbondata/blob/1f54c472/core/src/main/java/org/apache/carbondata/core/scan/result/vector/impl/CarbonColumnVectorImpl.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/carbondata/core/scan/result/vector/impl/CarbonColumnVectorImpl.java b/core/src/main/java/org/apache/carbondata/core/scan/result/vector/impl/CarbonColumnVectorImpl.java index 5f8233c..e431aaf 100644 --- a/core/src/main/java/org/apache/carbondata/core/scan/result/vector/impl/CarbonColumnVectorImpl.java +++ b/core/src/main/java/org/apache/carbondata/core/scan/result/vector/impl/CarbonColumnVectorImpl.java @@ -51,6 +51,8 @@ public class CarbonColumnVectorImpl implements CarbonColumnVector { private DataType dataType; + private DataType blockDataType; + /** * True if there is at least one NULL byte set. This is an optimization for the writer, to skip * having to clear NULL bits. @@ -235,6 +237,16 @@ public class CarbonColumnVectorImpl implements CarbonColumnVector { return dataType; } + @Override + public DataType getBlockDataType() { + return blockDataType; + } + + @Override + public void setBlockDataType(DataType blockDataType) { + this.blockDataType = blockDataType; + } + @Override public void setFilteredRowsExist(boolean filteredRowsExist) { } http://git-wip-us.apache.org/repos/asf/carbondata/blob/1f54c472/core/src/main/java/org/apache/carbondata/core/util/DataTypeUtil.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/carbondata/core/util/DataTypeUtil.java b/core/src/main/java/org/apache/carbondata/core/util/DataTypeUtil.java index 65ea63e..5d188b5 100644 --- a/core/src/main/java/org/apache/carbondata/core/util/DataTypeUtil.java +++ b/core/src/main/java/org/apache/carbondata/core/util/DataTypeUtil.java @@ -775,4 +775,25 @@ public final class DataTypeUtil { } } -} \ No newline at end of file + /** + * Method to type case the data based on modified data type. This method will used for + * retrieving the data after change in data type restructure operation + * + * @param data + * @param restructuredDataType + * @param currentDataOffset + * @param length + * @return + */ + public static long getDataBasedOnRestructuredDataType(byte[] data, DataType restructuredDataType, + int currentDataOffset, int length) { + long value = 0L; + if (restructuredDataType == DataTypes.INT) { + value = ByteUtil.toInt(data, currentDataOffset, length); + } else if (restructuredDataType == DataTypes.LONG) { + value = ByteUtil.toLong(data, currentDataOffset, length); + } + return value; + } + +} http://git-wip-us.apache.org/repos/asf/carbondata/blob/1f54c472/integration/presto/src/main/java/org/apache/carbondata/presto/CarbonColumnVectorWrapper.java ---------------------------------------------------------------------- diff --git a/integration/presto/src/main/java/org/apache/carbondata/presto/CarbonColumnVectorWrapper.java b/integration/presto/src/main/java/org/apache/carbondata/presto/CarbonColumnVectorWrapper.java index e19a598..78a1ea8 100644 --- a/integration/presto/src/main/java/org/apache/carbondata/presto/CarbonColumnVectorWrapper.java +++ b/integration/presto/src/main/java/org/apache/carbondata/presto/CarbonColumnVectorWrapper.java @@ -33,6 +33,8 @@ public class CarbonColumnVectorWrapper implements CarbonColumnVector { private boolean filteredRowsExist; + private DataType blockDataType; + public CarbonColumnVectorWrapper(CarbonColumnVectorImpl columnVector, boolean[] filteredRows) { this.columnVector = columnVector; this.filteredRows = filteredRows; @@ -203,6 +205,16 @@ public class CarbonColumnVectorWrapper implements CarbonColumnVector { return columnVector.getType(); } + @Override + public DataType getBlockDataType() { + return blockDataType; + } + + @Override + public void setBlockDataType(DataType blockDataType) { + this.blockDataType = blockDataType; + } + @Override public void setFilteredRowsExist(boolean filteredRowsExist) { this.filteredRowsExist = filteredRowsExist; } http://git-wip-us.apache.org/repos/asf/carbondata/blob/1f54c472/integration/spark2/src/main/java/org/apache/carbondata/spark/vectorreader/ColumnarVectorWrapper.java ---------------------------------------------------------------------- diff --git a/integration/spark2/src/main/java/org/apache/carbondata/spark/vectorreader/ColumnarVectorWrapper.java b/integration/spark2/src/main/java/org/apache/carbondata/spark/vectorreader/ColumnarVectorWrapper.java index 9387276..7d42130 100644 --- a/integration/spark2/src/main/java/org/apache/carbondata/spark/vectorreader/ColumnarVectorWrapper.java +++ b/integration/spark2/src/main/java/org/apache/carbondata/spark/vectorreader/ColumnarVectorWrapper.java @@ -38,6 +38,8 @@ class ColumnarVectorWrapper implements CarbonColumnVector { private DataType dataType; + private DataType blockDataType; + public ColumnarVectorWrapper(ColumnVector columnVector, boolean[] filteredRows) { this.columnVector = columnVector; this.filteredRows = filteredRows; @@ -211,6 +213,16 @@ class ColumnarVectorWrapper implements CarbonColumnVector { return dataType; } + @Override + public DataType getBlockDataType() { + return blockDataType; + } + + @Override + public void setBlockDataType(DataType blockDataType) { + this.blockDataType = blockDataType; + } + @Override public void setFilteredRowsExist(boolean filteredRowsExist) { this.filteredRowsExist = filteredRowsExist; } http://git-wip-us.apache.org/repos/asf/carbondata/blob/1f54c472/integration/spark2/src/test/scala/org/apache/spark/carbondata/restructure/vectorreader/ChangeDataTypeTestCases.scala ---------------------------------------------------------------------- diff --git a/integration/spark2/src/test/scala/org/apache/spark/carbondata/restructure/vectorreader/ChangeDataTypeTestCases.scala b/integration/spark2/src/test/scala/org/apache/spark/carbondata/restructure/vectorreader/ChangeDataTypeTestCases.scala index 16c0895..0124716 100644 --- a/integration/spark2/src/test/scala/org/apache/spark/carbondata/restructure/vectorreader/ChangeDataTypeTestCases.scala +++ b/integration/spark2/src/test/scala/org/apache/spark/carbondata/restructure/vectorreader/ChangeDataTypeTestCases.scala @@ -164,6 +164,21 @@ class ChangeDataTypeTestCases extends Spark2QueryTest with BeforeAndAfterAll { sql("drop table if exists PreAggMain_preagg1") } + test("test data type change for dictionary exclude INT type column") { + sql("drop table if exists table_sort") + sql("CREATE TABLE table_sort (imei int,age int,mac string) STORED BY 'carbondata' TBLPROPERTIES('DICTIONARY_EXCLUDE'='imei,age','SORT_COLUMNS'='imei,age')") + sql("insert into table_sort select 32674,32794,'MAC1'") + sql("alter table table_sort change age age bigint") + sql("insert into table_sort select 32675,9223372036854775807,'MAC2'") + try { + sqlContext.setConf("carbon.enable.vector.reader", "true") + checkAnswer(sql("select * from table_sort"), + Seq(Row(32674, 32794, "MAC1"), Row(32675, Long.MaxValue, "MAC2"))) + } finally { + sqlContext.setConf("carbon.enable.vector.reader", "true") + } + } + override def afterAll { sql("DROP TABLE IF EXISTS changedatatypetest") sql("DROP TABLE IF EXISTS hivetable")
