Repository: carbondata Updated Branches: refs/heads/master c7c83684b -> fa9a4eeeb
[CARBONDATA-3022] Refactor ColumnPageWrapper Refactor ColumnPageWrapper for better filter query performance. Removed unnecessary checks and loops This closes #2808 Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/fa9a4eee Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/fa9a4eee Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/fa9a4eee Branch: refs/heads/master Commit: fa9a4eeeb489b77b3040d54e7878bac93ccb12aa Parents: c7c8368 Author: dhatchayani <dhatcha.offic...@gmail.com> Authored: Wed Oct 10 13:18:01 2018 +0530 Committer: manishgupta88 <tomanishgupt...@gmail.com> Committed: Tue Oct 23 15:12:21 2018 +0530 ---------------------------------------------------------------------- .../chunk/store/ColumnPageWrapper.java | 126 +++++-------------- .../core/scan/executor/util/QueryUtil.java | 32 ----- .../carbondata/core/util/DataTypeUtil.java | 18 --- 3 files changed, 32 insertions(+), 144 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/carbondata/blob/fa9a4eee/core/src/main/java/org/apache/carbondata/core/datastore/chunk/store/ColumnPageWrapper.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/carbondata/core/datastore/chunk/store/ColumnPageWrapper.java b/core/src/main/java/org/apache/carbondata/core/datastore/chunk/store/ColumnPageWrapper.java index 627c75f..ba853f9 100644 --- a/core/src/main/java/org/apache/carbondata/core/datastore/chunk/store/ColumnPageWrapper.java +++ b/core/src/main/java/org/apache/carbondata/core/datastore/chunk/store/ColumnPageWrapper.java @@ -26,7 +26,6 @@ import org.apache.carbondata.core.datastore.chunk.DimensionColumnPage; import org.apache.carbondata.core.datastore.page.ColumnPage; import org.apache.carbondata.core.metadata.datatype.DataType; import org.apache.carbondata.core.metadata.datatype.DataTypes; -import org.apache.carbondata.core.scan.executor.util.QueryUtil; import org.apache.carbondata.core.scan.result.vector.CarbonColumnVector; import org.apache.carbondata.core.scan.result.vector.CarbonDictionary; import org.apache.carbondata.core.scan.result.vector.ColumnVectorInfo; @@ -83,31 +82,6 @@ public class ColumnPageWrapper implements DimensionColumnPage { return chunkIndex + 1; } - /** - * Fill the data to the vector - * - * @param rowId - * @param vector - * @param vectorRow - */ - private void fillRow(int rowId, CarbonColumnVector vector, int vectorRow) { - if (columnPage.getNullBits().get(rowId) - && columnPage.getColumnSpec().getColumnType() == ColumnType.COMPLEX_PRIMITIVE) { - // if this row is null, return default null represent in byte array - byte[] value = CarbonCommonConstants.MEMBER_DEFAULT_VAL_ARRAY; - QueryUtil.putDataToVector(vector, value, vectorRow, value.length); - } else if (columnPage.getNullBits().get(rowId)) { - // if this row is null, return default null represent in byte array - byte[] value = CarbonCommonConstants.EMPTY_BYTE_ARRAY; - QueryUtil.putDataToVector(vector, value, vectorRow, value.length); - } else { - if (isExplicitSorted) { - rowId = invertedReverseIndex[rowId]; - } - QueryUtil.putDataToVector(vector, getActualData(rowId, true), vectorRow); - } - } - @Override public int fillVector(int[] filteredRowId, ColumnVectorInfo[] vectorInfo, int chunkIndex) { ColumnVectorInfo columnVectorInfo = vectorInfo[chunkIndex]; @@ -214,78 +188,42 @@ public class ColumnPageWrapper implements DimensionColumnPage { return null; } - private Object getActualData(int rowId, boolean isRowIdChanged) { - ColumnType columnType = columnPage.getColumnSpec().getColumnType(); - DataType srcDataType = columnPage.getColumnSpec().getSchemaDataType(); - DataType targetDataType = columnPage.getDataType(); - if (null != localDictionary) { - return localDictionary - .getDictionaryValue(CarbonUtil.getSurrogateInternal(columnPage.getBytes(rowId), 0, 3)); - } else if ((columnType == ColumnType.COMPLEX_PRIMITIVE && this.isAdaptiveEncoded()) || ( - columnType == ColumnType.PLAIN_VALUE && DataTypeUtil.isPrimitiveColumn(srcDataType))) { - if (!isRowIdChanged && columnPage.getNullBits().get(rowId) - && columnType == ColumnType.COMPLEX_PRIMITIVE) { - // if this row is null, return default null represent in byte array - return CarbonCommonConstants.MEMBER_DEFAULT_VAL_ARRAY; - } - if (!isRowIdChanged && columnPage.getNullBits().get(rowId)) { - // if this row is null, return default null represent in byte array - return CarbonCommonConstants.EMPTY_BYTE_ARRAY; + /** + * Fill the data to the vector + * + * @param rowId + * @param vector + * @param vectorRow + */ + private void fillRow(int rowId, CarbonColumnVector vector, int vectorRow) { + if (columnPage.getNullBits().get(rowId)) { + vector.putNull(vectorRow); + } else { + if (isExplicitSorted) { + rowId = invertedReverseIndex[rowId]; } - if (srcDataType == DataTypes.DOUBLE || srcDataType == DataTypes.FLOAT) { - double doubleData = columnPage.getDouble(rowId); - if (srcDataType == DataTypes.FLOAT) { - return (float) doubleData; - } else { - return doubleData; - } - } else if (DataTypes.isDecimal(srcDataType)) { - throw new RuntimeException("unsupported type: " + srcDataType); - } else if ((srcDataType == DataTypes.BYTE) || (srcDataType == DataTypes.BOOLEAN) || ( - srcDataType == DataTypes.SHORT) || (srcDataType == DataTypes.SHORT_INT) || (srcDataType - == DataTypes.INT) || (srcDataType == DataTypes.LONG) || (srcDataType - == DataTypes.TIMESTAMP)) { - long longData = columnPage.getLong(rowId); - if ((srcDataType == DataTypes.BYTE)) { - return (byte) longData; - } else if (srcDataType == DataTypes.BOOLEAN) { - byte out = (byte) longData; - return ByteUtil.toBoolean(out); - } else if (srcDataType == DataTypes.SHORT) { - return (short) longData; - } else if (srcDataType == DataTypes.SHORT_INT) { - return (int) longData; - } else if (srcDataType == DataTypes.INT) { - return (int) longData; - } else { - // timestamp and long - return longData; + DataType dt = vector.getType(); + long longData = columnPage.getLong(rowId); + if (dt == DataTypes.BOOLEAN) { + vector.putBoolean(vectorRow, ByteUtil.toBoolean((byte) longData)); + } else if (dt == DataTypes.BYTE) { + vector.putByte(vectorRow, (byte) longData); + } else if (dt == DataTypes.SHORT) { + vector.putShort(vectorRow, (short) longData); + } else if (dt == DataTypes.INT) { + vector.putInt(vectorRow, (int) longData); + } else if (dt == DataTypes.LONG) { + // retrieving the data after change in data type restructure operation + if (vector.getBlockDataType() == DataTypes.INT) { + vector.putLong(vectorRow, (int) longData); + } else if (vector.getBlockDataType() == DataTypes.LONG) { + vector.putLong(vectorRow, longData); } - } else if ((targetDataType == DataTypes.STRING) || (targetDataType == DataTypes.VARCHAR) || ( - targetDataType == DataTypes.BYTE_ARRAY)) { - return columnPage.getBytes(rowId); - } else { - throw new RuntimeException("unsupported type: " + targetDataType); - } - } else if ((columnType == ColumnType.COMPLEX_PRIMITIVE && !this.isAdaptiveEncoded())) { - if (!isRowIdChanged && columnPage.getNullBits().get(rowId)) { - return CarbonCommonConstants.EMPTY_BYTE_ARRAY; - } - if ((srcDataType == DataTypes.BYTE) || (srcDataType == DataTypes.BOOLEAN)) { - byte[] out = new byte[1]; - out[0] = (columnPage.getByte(rowId)); - return ByteUtil.toBoolean(out); - } else if (srcDataType == DataTypes.BYTE_ARRAY) { - return columnPage.getBytes(rowId); - } else if (srcDataType == DataTypes.DOUBLE) { - return columnPage.getDouble(rowId); - } else if (srcDataType == targetDataType) { - return columnPage.getBytes(rowId); + } else if (dt == DataTypes.TIMESTAMP) { + vector.putLong(vectorRow, longData * 1000L); } else { - throw new RuntimeException("unsupported type: " + targetDataType); + throw new RuntimeException("unsupported type: " + dt); } - } else { - return columnPage.getBytes(rowId); } } http://git-wip-us.apache.org/repos/asf/carbondata/blob/fa9a4eee/core/src/main/java/org/apache/carbondata/core/scan/executor/util/QueryUtil.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/carbondata/core/scan/executor/util/QueryUtil.java b/core/src/main/java/org/apache/carbondata/core/scan/executor/util/QueryUtil.java index 7849d10..22e1e72 100644 --- a/core/src/main/java/org/apache/carbondata/core/scan/executor/util/QueryUtil.java +++ b/core/src/main/java/org/apache/carbondata/core/scan/executor/util/QueryUtil.java @@ -777,36 +777,4 @@ public class QueryUtil { } } } - - /** - * Put the data to vector - * - * @param vector - * @param value - * @param vectorRow - */ - public static void putDataToVector(CarbonColumnVector vector, Object value, int vectorRow) { - DataType dt = vector.getType(); - if (value.equals(CarbonCommonConstants.MEMBER_DEFAULT_VAL_ARRAY) || value - .equals(CarbonCommonConstants.EMPTY_BYTE_ARRAY)) { - vector.putNull(vectorRow); - } else { - if (dt == DataTypes.STRING) { - vector.putBytes(vectorRow, (byte[]) value); - } else if (dt == DataTypes.BOOLEAN) { - vector.putBoolean(vectorRow, (boolean) value); - } else if (dt == DataTypes.BYTE) { - vector.putByte(vectorRow, (byte) value); - } else if (dt == DataTypes.SHORT) { - vector.putShort(vectorRow, (short) value); - } else if (dt == DataTypes.INT) { - vector.putInt(vectorRow, (int) value); - } else if (dt == DataTypes.LONG) { - vector.putLong(vectorRow, - DataTypeUtil.getDataBasedOnRestructuredDataType(value, vector.getBlockDataType())); - } else if (dt == DataTypes.TIMESTAMP) { - vector.putLong(vectorRow, (long) value * 1000L); - } - } - } } http://git-wip-us.apache.org/repos/asf/carbondata/blob/fa9a4eee/core/src/main/java/org/apache/carbondata/core/util/DataTypeUtil.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/carbondata/core/util/DataTypeUtil.java b/core/src/main/java/org/apache/carbondata/core/util/DataTypeUtil.java index 66faf20..8f05f39 100644 --- a/core/src/main/java/org/apache/carbondata/core/util/DataTypeUtil.java +++ b/core/src/main/java/org/apache/carbondata/core/util/DataTypeUtil.java @@ -1085,24 +1085,6 @@ public final class DataTypeUtil { } /** - * Method to type case the data based on modified data type. This method will used for - * retrieving the data after change in data type restructure operation - * - * @param data - * @param restructureDataType - * @return - */ - public static long getDataBasedOnRestructuredDataType(Object data, DataType restructureDataType) { - long value = 0L; - if (restructureDataType == DataTypes.INT) { - value = (int) data; - } else if (restructureDataType == DataTypes.LONG) { - value = (long) data; - } - return value; - } - - /** * Check if the column is a no dictionary primitive column * * @param dataType