[2/2] carbondata git commit: [CARBONDATA-2018][DataLoad] Optimization in reading/writing for sort temp row
[CARBONDATA-2018][DataLoad] Optimization in reading/writing for sort temp row Pick up the no-sort fields in the row and pack them as bytes array and skip parsing them during merge sort to reduce CPU consumption This closes #1792 Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/937bdb86 Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/937bdb86 Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/937bdb86 Branch: refs/heads/carbonstore Commit: 937bdb867aeac5159f51de3582c4556c949bfb5c Parents: 0d50f65 Author: xuchuanyinAuthored: Thu Feb 8 14:35:14 2018 +0800 Committer: Jacky Li Committed: Mon Feb 12 16:13:21 2018 +0800 -- .../carbondata/core/util/NonDictionaryUtil.java | 67 +-- .../presto/util/CarbonDataStoreCreator.scala| 1 - .../load/DataLoadProcessorStepOnSpark.scala | 6 +- .../loading/row/IntermediateSortTempRow.java| 117 + .../loading/sort/SortStepRowHandler.java| 466 +++ .../loading/sort/SortStepRowUtil.java | 103 .../sort/unsafe/UnsafeCarbonRowPage.java| 331 ++--- .../loading/sort/unsafe/UnsafeSortDataRows.java | 57 +-- .../unsafe/comparator/UnsafeRowComparator.java | 95 ++-- .../UnsafeRowComparatorForNormalDIms.java | 59 --- .../UnsafeRowComparatorForNormalDims.java | 59 +++ .../sort/unsafe/holder/SortTempChunkHolder.java | 3 +- .../holder/UnsafeFinalMergePageHolder.java | 19 +- .../unsafe/holder/UnsafeInmemoryHolder.java | 21 +- .../holder/UnsafeSortTempFileChunkHolder.java | 138 ++ .../merger/UnsafeIntermediateFileMerger.java| 118 + .../UnsafeSingleThreadFinalSortFilesMerger.java | 27 +- .../merger/CompactionResultSortProcessor.java | 1 - .../sort/sortdata/IntermediateFileMerger.java | 95 +--- .../IntermediateSortTempRowComparator.java | 73 +++ .../sort/sortdata/NewRowComparator.java | 5 +- .../sortdata/NewRowComparatorForNormalDims.java | 3 +- .../processing/sort/sortdata/RowComparator.java | 94 .../sortdata/RowComparatorForNormalDims.java| 62 --- .../SingleThreadFinalSortFilesMerger.java | 25 +- .../processing/sort/sortdata/SortDataRows.java | 85 +--- .../sort/sortdata/SortTempFileChunkHolder.java | 174 ++- .../sort/sortdata/TableFieldStat.java | 176 +++ 28 files changed, 1186 insertions(+), 1294 deletions(-) -- http://git-wip-us.apache.org/repos/asf/carbondata/blob/937bdb86/core/src/main/java/org/apache/carbondata/core/util/NonDictionaryUtil.java -- diff --git a/core/src/main/java/org/apache/carbondata/core/util/NonDictionaryUtil.java b/core/src/main/java/org/apache/carbondata/core/util/NonDictionaryUtil.java index d6ecfbc..fca1244 100644 --- a/core/src/main/java/org/apache/carbondata/core/util/NonDictionaryUtil.java +++ b/core/src/main/java/org/apache/carbondata/core/util/NonDictionaryUtil.java @@ -82,18 +82,26 @@ public class NonDictionaryUtil { } /** - * Method to get the required Dimension from obj [] + * Method to get the required dictionary Dimension from obj [] * * @param index * @param row * @return */ - public static Integer getDimension(int index, Object[] row) { - -Integer[] dimensions = (Integer[]) row[WriteStepRowUtil.DICTIONARY_DIMENSION]; - + public static int getDictDimension(int index, Object[] row) { +int[] dimensions = (int[]) row[WriteStepRowUtil.DICTIONARY_DIMENSION]; return dimensions[index]; + } + /** + * Method to get the required non-dictionary & complex from 3-parted row + * @param index + * @param row + * @return + */ + public static byte[] getNoDictOrComplex(int index, Object[] row) { +byte[][] nonDictArray = (byte[][]) row[WriteStepRowUtil.NO_DICTIONARY_AND_COMPLEX]; +return nonDictArray[index]; } /** @@ -108,60 +116,11 @@ public class NonDictionaryUtil { return measures[index]; } - public static byte[] getByteArrayForNoDictionaryCols(Object[] row) { - -return (byte[]) row[WriteStepRowUtil.NO_DICTIONARY_AND_COMPLEX]; - } - public static void prepareOutObj(Object[] out, int[] dimArray, byte[][] byteBufferArr, Object[] measureArray) { - out[WriteStepRowUtil.DICTIONARY_DIMENSION] = dimArray; out[WriteStepRowUtil.NO_DICTIONARY_AND_COMPLEX] = byteBufferArr; out[WriteStepRowUtil.MEASURE] = measureArray; } - - /** - * This method will extract the single dimension from the complete high card dims byte[].+ * - * The format of the byte [] will be, Totallength,CompleteStartOffsets,Dat - * - * @param highCardArr - * @param index - * @param highCardinalityCount - *
[2/2] carbondata git commit: [CARBONDATA-2018][DataLoad] Optimization in reading/writing for sort temp row
[CARBONDATA-2018][DataLoad] Optimization in reading/writing for sort temp row Pick up the no-sort fields in the row and pack them as bytes array and skip parsing them during merge sort to reduce CPU consumption This closes #1792 Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/de92ea9a Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/de92ea9a Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/de92ea9a Branch: refs/heads/carbonstore Commit: de92ea9a123b17d903f2d1d4662299315c792954 Parents: cd7eed6 Author: xuchuanyinAuthored: Thu Feb 8 14:35:14 2018 +0800 Committer: Jacky Li Committed: Fri Feb 9 01:01:22 2018 +0800 -- .../carbondata/core/util/NonDictionaryUtil.java | 67 +-- .../presto/util/CarbonDataStoreCreator.scala| 1 - .../load/DataLoadProcessorStepOnSpark.scala | 6 +- .../loading/row/IntermediateSortTempRow.java| 117 + .../loading/sort/SortStepRowHandler.java| 466 +++ .../loading/sort/SortStepRowUtil.java | 103 .../sort/unsafe/UnsafeCarbonRowPage.java| 331 ++--- .../loading/sort/unsafe/UnsafeSortDataRows.java | 57 +-- .../unsafe/comparator/UnsafeRowComparator.java | 95 ++-- .../UnsafeRowComparatorForNormalDIms.java | 59 --- .../UnsafeRowComparatorForNormalDims.java | 59 +++ .../sort/unsafe/holder/SortTempChunkHolder.java | 3 +- .../holder/UnsafeFinalMergePageHolder.java | 19 +- .../unsafe/holder/UnsafeInmemoryHolder.java | 21 +- .../holder/UnsafeSortTempFileChunkHolder.java | 138 ++ .../merger/UnsafeIntermediateFileMerger.java| 118 + .../UnsafeSingleThreadFinalSortFilesMerger.java | 27 +- .../merger/CompactionResultSortProcessor.java | 1 - .../sort/sortdata/IntermediateFileMerger.java | 95 +--- .../IntermediateSortTempRowComparator.java | 73 +++ .../sort/sortdata/NewRowComparator.java | 5 +- .../sortdata/NewRowComparatorForNormalDims.java | 3 +- .../processing/sort/sortdata/RowComparator.java | 94 .../sortdata/RowComparatorForNormalDims.java| 62 --- .../SingleThreadFinalSortFilesMerger.java | 25 +- .../processing/sort/sortdata/SortDataRows.java | 85 +--- .../sort/sortdata/SortTempFileChunkHolder.java | 174 ++- .../sort/sortdata/TableFieldStat.java | 176 +++ 28 files changed, 1186 insertions(+), 1294 deletions(-) -- http://git-wip-us.apache.org/repos/asf/carbondata/blob/de92ea9a/core/src/main/java/org/apache/carbondata/core/util/NonDictionaryUtil.java -- diff --git a/core/src/main/java/org/apache/carbondata/core/util/NonDictionaryUtil.java b/core/src/main/java/org/apache/carbondata/core/util/NonDictionaryUtil.java index d6ecfbc..fca1244 100644 --- a/core/src/main/java/org/apache/carbondata/core/util/NonDictionaryUtil.java +++ b/core/src/main/java/org/apache/carbondata/core/util/NonDictionaryUtil.java @@ -82,18 +82,26 @@ public class NonDictionaryUtil { } /** - * Method to get the required Dimension from obj [] + * Method to get the required dictionary Dimension from obj [] * * @param index * @param row * @return */ - public static Integer getDimension(int index, Object[] row) { - -Integer[] dimensions = (Integer[]) row[WriteStepRowUtil.DICTIONARY_DIMENSION]; - + public static int getDictDimension(int index, Object[] row) { +int[] dimensions = (int[]) row[WriteStepRowUtil.DICTIONARY_DIMENSION]; return dimensions[index]; + } + /** + * Method to get the required non-dictionary & complex from 3-parted row + * @param index + * @param row + * @return + */ + public static byte[] getNoDictOrComplex(int index, Object[] row) { +byte[][] nonDictArray = (byte[][]) row[WriteStepRowUtil.NO_DICTIONARY_AND_COMPLEX]; +return nonDictArray[index]; } /** @@ -108,60 +116,11 @@ public class NonDictionaryUtil { return measures[index]; } - public static byte[] getByteArrayForNoDictionaryCols(Object[] row) { - -return (byte[]) row[WriteStepRowUtil.NO_DICTIONARY_AND_COMPLEX]; - } - public static void prepareOutObj(Object[] out, int[] dimArray, byte[][] byteBufferArr, Object[] measureArray) { - out[WriteStepRowUtil.DICTIONARY_DIMENSION] = dimArray; out[WriteStepRowUtil.NO_DICTIONARY_AND_COMPLEX] = byteBufferArr; out[WriteStepRowUtil.MEASURE] = measureArray; } - - /** - * This method will extract the single dimension from the complete high card dims byte[].+ * - * The format of the byte [] will be, Totallength,CompleteStartOffsets,Dat - * - * @param highCardArr - * @param index - * @param highCardinalityCount - *