Repository: carbondata Updated Branches: refs/heads/master 6a2a94d05 -> d7913b1fd
[HOTFIX] Fixed data loading failure Problem: 1.Data loading is failing with ArrayIndexOutOfBoundException when all columns are not considered in sort columns. this is because while filling the sort columns details in CarbonDataProcessorUtil.getNoDictSortColMapping it is not checking whether column is present on sort column or not. 2.Some time in CI testcases are failing because of Negative array exception , still it is not clear from the code the main root cause of failure, this failure may happen in customer actual deployment . Solution: 1.Add only those columns which is present in sort columns 2.Currently disabling unsafe default value to false This closes #2754 Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/d7913b1f Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/d7913b1f Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/d7913b1f Branch: refs/heads/master Commit: d7913b1fdbfb9eb080f4f14f2ebff984448710f4 Parents: 6a2a94d Author: kumarvishal09 <[email protected]> Authored: Mon Sep 24 19:34:59 2018 +0530 Committer: Jacky Li <[email protected]> Committed: Wed Sep 26 00:08:00 2018 +0800 ---------------------------------------------------------------------- .../core/constants/CarbonCommonConstants.java | 2 +- ...feVariableLengthDimensionDataChunkStore.java | 2 +- .../load/DataLoadProcessBuilderOnSpark.scala | 3 +- .../sort/sortdata/NewRowComparator.java | 48 +++++++------------- .../processing/sort/sortdata/SortDataRows.java | 7 ++- .../util/CarbonDataProcessorUtil.java | 6 +-- 6 files changed, 26 insertions(+), 42 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/carbondata/blob/d7913b1f/core/src/main/java/org/apache/carbondata/core/constants/CarbonCommonConstants.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/carbondata/core/constants/CarbonCommonConstants.java b/core/src/main/java/org/apache/carbondata/core/constants/CarbonCommonConstants.java index 21f1f34..faad0dc 100644 --- a/core/src/main/java/org/apache/carbondata/core/constants/CarbonCommonConstants.java +++ b/core/src/main/java/org/apache/carbondata/core/constants/CarbonCommonConstants.java @@ -1298,7 +1298,7 @@ public final class CarbonCommonConstants { /** * default property of unsafe processing */ - public static final String ENABLE_UNSAFE_IN_QUERY_EXECUTION_DEFAULTVALUE = "true"; + public static final String ENABLE_UNSAFE_IN_QUERY_EXECUTION_DEFAULTVALUE = "false"; /** * whether to prefetch data while loading. http://git-wip-us.apache.org/repos/asf/carbondata/blob/d7913b1f/core/src/main/java/org/apache/carbondata/core/datastore/chunk/store/impl/safe/SafeVariableLengthDimensionDataChunkStore.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/carbondata/core/datastore/chunk/store/impl/safe/SafeVariableLengthDimensionDataChunkStore.java b/core/src/main/java/org/apache/carbondata/core/datastore/chunk/store/impl/safe/SafeVariableLengthDimensionDataChunkStore.java index 196dc4c..8553506 100644 --- a/core/src/main/java/org/apache/carbondata/core/datastore/chunk/store/impl/safe/SafeVariableLengthDimensionDataChunkStore.java +++ b/core/src/main/java/org/apache/carbondata/core/datastore/chunk/store/impl/safe/SafeVariableLengthDimensionDataChunkStore.java @@ -139,7 +139,7 @@ public abstract class SafeVariableLengthDimensionDataChunkStore length = dataOffsets[rowId + 1] - (currentDataOffset + getLengthSize()); } else { // for last record - length = (short) (this.data.length - currentDataOffset); + length = this.data.length - currentDataOffset; } DataType dt = vector.getType(); http://git-wip-us.apache.org/repos/asf/carbondata/blob/d7913b1f/integration/spark-common/src/main/scala/org/apache/carbondata/spark/load/DataLoadProcessBuilderOnSpark.scala ---------------------------------------------------------------------- diff --git a/integration/spark-common/src/main/scala/org/apache/carbondata/spark/load/DataLoadProcessBuilderOnSpark.scala b/integration/spark-common/src/main/scala/org/apache/carbondata/spark/load/DataLoadProcessBuilderOnSpark.scala index e810829..2e74a94 100644 --- a/integration/spark-common/src/main/scala/org/apache/carbondata/spark/load/DataLoadProcessBuilderOnSpark.scala +++ b/integration/spark-common/src/main/scala/org/apache/carbondata/spark/load/DataLoadProcessBuilderOnSpark.scala @@ -86,8 +86,7 @@ object DataLoadProcessBuilderOnSpark { val sortParameters = SortParameters.createSortParameters(configuration) val rowComparator: Comparator[Array[AnyRef]] = if (sortParameters.getNoDictionaryCount > 0) { - new NewRowComparator(sortParameters.getNoDictionaryDimnesionColumn, - sortParameters.getNoDictionarySortColumn, + new NewRowComparator(sortParameters.getNoDictionarySortColumn, sortParameters.getNoDictDataType) } else { new NewRowComparatorForNormalDims(sortParameters.getDimColCount) http://git-wip-us.apache.org/repos/asf/carbondata/blob/d7913b1f/processing/src/main/java/org/apache/carbondata/processing/sort/sortdata/NewRowComparator.java ---------------------------------------------------------------------- diff --git a/processing/src/main/java/org/apache/carbondata/processing/sort/sortdata/NewRowComparator.java b/processing/src/main/java/org/apache/carbondata/processing/sort/sortdata/NewRowComparator.java index f213764..09b3a59 100644 --- a/processing/src/main/java/org/apache/carbondata/processing/sort/sortdata/NewRowComparator.java +++ b/processing/src/main/java/org/apache/carbondata/processing/sort/sortdata/NewRowComparator.java @@ -28,21 +28,12 @@ import org.apache.carbondata.core.util.comparator.SerializableComparator; public class NewRowComparator implements Comparator<Object[]>, Serializable { private static final long serialVersionUID = -1739874611112709436L; - /** - * mapping of dictionary dimensions and no dictionary of sort_column. - */ - private boolean[] noDicDimColMapping; - private DataType[] noDicDataTypes; private boolean[] noDicSortColumnMapping; - /** - * @param noDicDimColMapping - */ - public NewRowComparator(boolean[] noDicDimColMapping, boolean[] noDicSortColumnMapping, + public NewRowComparator(boolean[] noDicSortColumnMapping, DataType[] noDicDataTypes) { - this.noDicDimColMapping = noDicDimColMapping; this.noDicSortColumnMapping = noDicSortColumnMapping; this.noDicDataTypes = noDicDataTypes; } @@ -55,27 +46,23 @@ public class NewRowComparator implements Comparator<Object[]>, Serializable { int index = 0; int dataTypeIdx = 0; int noDicSortIdx = 0; + for (int i = 0; i < noDicSortColumnMapping.length; i++) { + if (noDicSortColumnMapping[noDicSortIdx++]) { + if (DataTypeUtil.isPrimitiveColumn(noDicDataTypes[dataTypeIdx])) { + // use data types based comparator for the no dictionary measure columns + SerializableComparator comparator = org.apache.carbondata.core.util.comparator.Comparator + .getComparator(noDicDataTypes[dataTypeIdx]); + int difference = comparator.compare(rowA[index], rowB[index]); + if (difference != 0) { + return difference; + } + } else { + byte[] byteArr1 = (byte[]) rowA[index]; + byte[] byteArr2 = (byte[]) rowB[index]; - for (int i = 0; i < noDicDimColMapping.length; i++) { - if (noDicDimColMapping[i]) { - if (noDicSortColumnMapping[noDicSortIdx++]) { - if (DataTypeUtil.isPrimitiveColumn(noDicDataTypes[dataTypeIdx])) { - // use data types based comparator for the no dictionary measure columns - SerializableComparator comparator = - org.apache.carbondata.core.util.comparator.Comparator - .getComparator(noDicDataTypes[dataTypeIdx]); - int difference = comparator.compare(rowA[index], rowB[index]); - if (difference != 0) { - return difference; - } - } else { - byte[] byteArr1 = (byte[]) rowA[index]; - byte[] byteArr2 = (byte[]) rowB[index]; - - int difference = UnsafeComparer.INSTANCE.compareTo(byteArr1, byteArr2); - if (difference != 0) { - return difference; - } + int difference = UnsafeComparer.INSTANCE.compareTo(byteArr1, byteArr2); + if (difference != 0) { + return difference; } } dataTypeIdx++; @@ -88,7 +75,6 @@ public class NewRowComparator implements Comparator<Object[]>, Serializable { return diff; } } - index++; } return diff; http://git-wip-us.apache.org/repos/asf/carbondata/blob/d7913b1f/processing/src/main/java/org/apache/carbondata/processing/sort/sortdata/SortDataRows.java ---------------------------------------------------------------------- diff --git a/processing/src/main/java/org/apache/carbondata/processing/sort/sortdata/SortDataRows.java b/processing/src/main/java/org/apache/carbondata/processing/sort/sortdata/SortDataRows.java index 40ff2c9..637741c 100644 --- a/processing/src/main/java/org/apache/carbondata/processing/sort/sortdata/SortDataRows.java +++ b/processing/src/main/java/org/apache/carbondata/processing/sort/sortdata/SortDataRows.java @@ -204,8 +204,7 @@ public class SortDataRows { toSort = new Object[entryCount][]; System.arraycopy(recordHolderList, 0, toSort, 0, entryCount); if (parameters.getNumberOfNoDictSortColumns() > 0) { - Arrays.sort(toSort, new NewRowComparator(parameters.getNoDictionaryDimnesionColumn(), - parameters.getNoDictionarySortColumn(), + Arrays.sort(toSort, new NewRowComparator(parameters.getNoDictionarySortColumn(), parameters.getNoDictDataType())); } else { Arrays.sort(toSort, new NewRowComparatorForNormalDims(parameters.getNumberOfSortColumns())); @@ -318,8 +317,8 @@ public class SortDataRows { long startTime = System.currentTimeMillis(); if (parameters.getNumberOfNoDictSortColumns() > 0) { Arrays.sort(recordHolderArray, - new NewRowComparator(parameters.getNoDictionaryDimnesionColumn(), - parameters.getNoDictionarySortColumn(), parameters.getNoDictDataType())); + new NewRowComparator(parameters.getNoDictionarySortColumn(), + parameters.getNoDictDataType())); } else { Arrays.sort(recordHolderArray, new NewRowComparatorForNormalDims(parameters.getNumberOfSortColumns())); http://git-wip-us.apache.org/repos/asf/carbondata/blob/d7913b1f/processing/src/main/java/org/apache/carbondata/processing/util/CarbonDataProcessorUtil.java ---------------------------------------------------------------------- diff --git a/processing/src/main/java/org/apache/carbondata/processing/util/CarbonDataProcessorUtil.java b/processing/src/main/java/org/apache/carbondata/processing/util/CarbonDataProcessorUtil.java index 3ba1e1d..525f7ee 100644 --- a/processing/src/main/java/org/apache/carbondata/processing/util/CarbonDataProcessorUtil.java +++ b/processing/src/main/java/org/apache/carbondata/processing/util/CarbonDataProcessorUtil.java @@ -439,7 +439,7 @@ public final class CarbonDataProcessorUtil { List<CarbonDimension> dimensions = carbonTable.getDimensionByTableName(tableName); List<DataType> type = new ArrayList<>(); for (int i = 0; i < dimensions.size(); i++) { - if (!dimensions.get(i).hasEncoding(Encoding.DICTIONARY)) { + if (dimensions.get(i).isSortColumn() && !dimensions.get(i).hasEncoding(Encoding.DICTIONARY)) { type.add(dimensions.get(i).getDataType()); } } @@ -458,8 +458,8 @@ public final class CarbonDataProcessorUtil { List<CarbonDimension> dimensions = carbonTable.getDimensionByTableName(tableName); List<Boolean> noDicSortColMap = new ArrayList<>(); for (int i = 0; i < dimensions.size(); i++) { - if (!dimensions.get(i).hasEncoding(Encoding.DICTIONARY)) { - if (dimensions.get(i).isSortColumn()) { + if (dimensions.get(i).isSortColumn()) { + if (!dimensions.get(i).hasEncoding(Encoding.DICTIONARY)) { noDicSortColMap.add(true); } else { noDicSortColMap.add(false);
