[37/50] [abbrv] carbondata git commit: [CARBONDATA-2018][DataLoad] Optimization in reading/writing for sort temp row
[CARBONDATA-2018][DataLoad] Optimization in reading/writing for sort temp row Pick up the no-sort fields in the row and pack them as bytes array and skip parsing them during merge sort to reduce CPU consumption This closes #1792 Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/28b5720f Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/28b5720f Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/28b5720f Branch: refs/heads/carbonstore Commit: 28b5720fcf1cbd0d4bdf3f04e7b0edd8f9492a8d Parents: dcfe73b Author: xuchuanyin Authored: Thu Feb 8 14:35:14 2018 +0800 Committer: Jacky Li Committed: Sun Mar 4 20:32:13 2018 +0800 -- .../carbondata/core/util/NonDictionaryUtil.java | 67 +-- .../presto/util/CarbonDataStoreCreator.scala| 1 - .../load/DataLoadProcessorStepOnSpark.scala | 6 +- .../loading/row/IntermediateSortTempRow.java| 117 + .../loading/sort/SortStepRowHandler.java| 466 +++ .../loading/sort/SortStepRowUtil.java | 103 .../sort/unsafe/UnsafeCarbonRowPage.java| 331 ++--- .../loading/sort/unsafe/UnsafeSortDataRows.java | 57 +-- .../unsafe/comparator/UnsafeRowComparator.java | 95 ++-- .../UnsafeRowComparatorForNormalDIms.java | 59 --- .../UnsafeRowComparatorForNormalDims.java | 59 +++ .../sort/unsafe/holder/SortTempChunkHolder.java | 3 +- .../holder/UnsafeFinalMergePageHolder.java | 19 +- .../unsafe/holder/UnsafeInmemoryHolder.java | 21 +- .../holder/UnsafeSortTempFileChunkHolder.java | 138 ++ .../merger/UnsafeIntermediateFileMerger.java| 118 + .../UnsafeSingleThreadFinalSortFilesMerger.java | 27 +- .../merger/CompactionResultSortProcessor.java | 1 - .../sort/sortdata/IntermediateFileMerger.java | 95 +--- .../IntermediateSortTempRowComparator.java | 73 +++ .../sort/sortdata/NewRowComparator.java | 5 +- .../sortdata/NewRowComparatorForNormalDims.java | 3 +- .../processing/sort/sortdata/RowComparator.java | 94 .../sortdata/RowComparatorForNormalDims.java| 62 --- .../SingleThreadFinalSortFilesMerger.java | 25 +- .../processing/sort/sortdata/SortDataRows.java | 85 +--- .../sort/sortdata/SortTempFileChunkHolder.java | 174 ++- .../sort/sortdata/TableFieldStat.java | 176 +++ 28 files changed, 1186 insertions(+), 1294 deletions(-) -- http://git-wip-us.apache.org/repos/asf/carbondata/blob/28b5720f/core/src/main/java/org/apache/carbondata/core/util/NonDictionaryUtil.java -- diff --git a/core/src/main/java/org/apache/carbondata/core/util/NonDictionaryUtil.java b/core/src/main/java/org/apache/carbondata/core/util/NonDictionaryUtil.java index d6ecfbc..fca1244 100644 --- a/core/src/main/java/org/apache/carbondata/core/util/NonDictionaryUtil.java +++ b/core/src/main/java/org/apache/carbondata/core/util/NonDictionaryUtil.java @@ -82,18 +82,26 @@ public class NonDictionaryUtil { } /** - * Method to get the required Dimension from obj [] + * Method to get the required dictionary Dimension from obj [] * * @param index * @param row * @return */ - public static Integer getDimension(int index, Object[] row) { - -Integer[] dimensions = (Integer[]) row[WriteStepRowUtil.DICTIONARY_DIMENSION]; - + public static int getDictDimension(int index, Object[] row) { +int[] dimensions = (int[]) row[WriteStepRowUtil.DICTIONARY_DIMENSION]; return dimensions[index]; + } + /** + * Method to get the required non-dictionary & complex from 3-parted row + * @param index + * @param row + * @return + */ + public static byte[] getNoDictOrComplex(int index, Object[] row) { +byte[][] nonDictArray = (byte[][]) row[WriteStepRowUtil.NO_DICTIONARY_AND_COMPLEX]; +return nonDictArray[index]; } /** @@ -108,60 +116,11 @@ public class NonDictionaryUtil { return measures[index]; } - public static byte[] getByteArrayForNoDictionaryCols(Object[] row) { - -return (byte[]) row[WriteStepRowUtil.NO_DICTIONARY_AND_COMPLEX]; - } - public static void prepareOutObj(Object[] out, int[] dimArray, byte[][] byteBufferArr, Object[] measureArray) { - out[WriteStepRowUtil.DICTIONARY_DIMENSION] = dimArray; out[WriteStepRowUtil.NO_DICTIONARY_AND_COMPLEX] = byteBufferArr; out[WriteStepRowUtil.MEASURE] = measureArray; } - - /** - * This method will extract the single dimension from the complete high card dims byte[].+ * - * The format of the byte [] will be, Totallength,CompleteStartOffsets,Dat - * - * @param highCardArr - * @param index - * @param highCardinalityCount - * @param outBuffer - */ - public static void extr
[37/50] [abbrv] carbondata git commit: [CARBONDATA-2018][DataLoad] Optimization in reading/writing for sort temp row
[CARBONDATA-2018][DataLoad] Optimization in reading/writing for sort temp row Pick up the no-sort fields in the row and pack them as bytes array and skip parsing them during merge sort to reduce CPU consumption This closes #1792 Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/28b5720f Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/28b5720f Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/28b5720f Branch: refs/heads/carbonstore-rebase5 Commit: 28b5720fcf1cbd0d4bdf3f04e7b0edd8f9492a8d Parents: dcfe73b Author: xuchuanyin Authored: Thu Feb 8 14:35:14 2018 +0800 Committer: Jacky Li Committed: Sun Mar 4 20:32:13 2018 +0800 -- .../carbondata/core/util/NonDictionaryUtil.java | 67 +-- .../presto/util/CarbonDataStoreCreator.scala| 1 - .../load/DataLoadProcessorStepOnSpark.scala | 6 +- .../loading/row/IntermediateSortTempRow.java| 117 + .../loading/sort/SortStepRowHandler.java| 466 +++ .../loading/sort/SortStepRowUtil.java | 103 .../sort/unsafe/UnsafeCarbonRowPage.java| 331 ++--- .../loading/sort/unsafe/UnsafeSortDataRows.java | 57 +-- .../unsafe/comparator/UnsafeRowComparator.java | 95 ++-- .../UnsafeRowComparatorForNormalDIms.java | 59 --- .../UnsafeRowComparatorForNormalDims.java | 59 +++ .../sort/unsafe/holder/SortTempChunkHolder.java | 3 +- .../holder/UnsafeFinalMergePageHolder.java | 19 +- .../unsafe/holder/UnsafeInmemoryHolder.java | 21 +- .../holder/UnsafeSortTempFileChunkHolder.java | 138 ++ .../merger/UnsafeIntermediateFileMerger.java| 118 + .../UnsafeSingleThreadFinalSortFilesMerger.java | 27 +- .../merger/CompactionResultSortProcessor.java | 1 - .../sort/sortdata/IntermediateFileMerger.java | 95 +--- .../IntermediateSortTempRowComparator.java | 73 +++ .../sort/sortdata/NewRowComparator.java | 5 +- .../sortdata/NewRowComparatorForNormalDims.java | 3 +- .../processing/sort/sortdata/RowComparator.java | 94 .../sortdata/RowComparatorForNormalDims.java| 62 --- .../SingleThreadFinalSortFilesMerger.java | 25 +- .../processing/sort/sortdata/SortDataRows.java | 85 +--- .../sort/sortdata/SortTempFileChunkHolder.java | 174 ++- .../sort/sortdata/TableFieldStat.java | 176 +++ 28 files changed, 1186 insertions(+), 1294 deletions(-) -- http://git-wip-us.apache.org/repos/asf/carbondata/blob/28b5720f/core/src/main/java/org/apache/carbondata/core/util/NonDictionaryUtil.java -- diff --git a/core/src/main/java/org/apache/carbondata/core/util/NonDictionaryUtil.java b/core/src/main/java/org/apache/carbondata/core/util/NonDictionaryUtil.java index d6ecfbc..fca1244 100644 --- a/core/src/main/java/org/apache/carbondata/core/util/NonDictionaryUtil.java +++ b/core/src/main/java/org/apache/carbondata/core/util/NonDictionaryUtil.java @@ -82,18 +82,26 @@ public class NonDictionaryUtil { } /** - * Method to get the required Dimension from obj [] + * Method to get the required dictionary Dimension from obj [] * * @param index * @param row * @return */ - public static Integer getDimension(int index, Object[] row) { - -Integer[] dimensions = (Integer[]) row[WriteStepRowUtil.DICTIONARY_DIMENSION]; - + public static int getDictDimension(int index, Object[] row) { +int[] dimensions = (int[]) row[WriteStepRowUtil.DICTIONARY_DIMENSION]; return dimensions[index]; + } + /** + * Method to get the required non-dictionary & complex from 3-parted row + * @param index + * @param row + * @return + */ + public static byte[] getNoDictOrComplex(int index, Object[] row) { +byte[][] nonDictArray = (byte[][]) row[WriteStepRowUtil.NO_DICTIONARY_AND_COMPLEX]; +return nonDictArray[index]; } /** @@ -108,60 +116,11 @@ public class NonDictionaryUtil { return measures[index]; } - public static byte[] getByteArrayForNoDictionaryCols(Object[] row) { - -return (byte[]) row[WriteStepRowUtil.NO_DICTIONARY_AND_COMPLEX]; - } - public static void prepareOutObj(Object[] out, int[] dimArray, byte[][] byteBufferArr, Object[] measureArray) { - out[WriteStepRowUtil.DICTIONARY_DIMENSION] = dimArray; out[WriteStepRowUtil.NO_DICTIONARY_AND_COMPLEX] = byteBufferArr; out[WriteStepRowUtil.MEASURE] = measureArray; } - - /** - * This method will extract the single dimension from the complete high card dims byte[].+ * - * The format of the byte [] will be, Totallength,CompleteStartOffsets,Dat - * - * @param highCardArr - * @param index - * @param highCardinalityCount - * @param outBuffer - */ - public static v
[37/50] [abbrv] carbondata git commit: [CARBONDATA-2018][DataLoad] Optimization in reading/writing for sort temp row
http://git-wip-us.apache.org/repos/asf/carbondata/blob/f9d46b18/processing/src/main/java/org/apache/carbondata/processing/loading/sort/unsafe/holder/UnsafeSortTempFileChunkHolder.java -- diff --git a/processing/src/main/java/org/apache/carbondata/processing/loading/sort/unsafe/holder/UnsafeSortTempFileChunkHolder.java b/processing/src/main/java/org/apache/carbondata/processing/loading/sort/unsafe/holder/UnsafeSortTempFileChunkHolder.java index 11b3d43..527452a 100644 --- a/processing/src/main/java/org/apache/carbondata/processing/loading/sort/unsafe/holder/UnsafeSortTempFileChunkHolder.java +++ b/processing/src/main/java/org/apache/carbondata/processing/loading/sort/unsafe/holder/UnsafeSortTempFileChunkHolder.java @@ -31,15 +31,14 @@ import org.apache.carbondata.common.logging.LogService; import org.apache.carbondata.common.logging.LogServiceFactory; import org.apache.carbondata.core.constants.CarbonCommonConstants; import org.apache.carbondata.core.datastore.impl.FileFactory; -import org.apache.carbondata.core.metadata.datatype.DataType; -import org.apache.carbondata.core.metadata.datatype.DataTypes; import org.apache.carbondata.core.util.CarbonProperties; import org.apache.carbondata.core.util.CarbonUtil; -import org.apache.carbondata.core.util.DataTypeUtil; -import org.apache.carbondata.processing.loading.sort.unsafe.UnsafeCarbonRowPage; +import org.apache.carbondata.processing.loading.row.IntermediateSortTempRow; +import org.apache.carbondata.processing.loading.sort.SortStepRowHandler; import org.apache.carbondata.processing.sort.exception.CarbonSortKeyAndGroupByException; -import org.apache.carbondata.processing.sort.sortdata.NewRowComparator; +import org.apache.carbondata.processing.sort.sortdata.IntermediateSortTempRowComparator; import org.apache.carbondata.processing.sort.sortdata.SortParameters; +import org.apache.carbondata.processing.sort.sortdata.TableFieldStat; public class UnsafeSortTempFileChunkHolder implements SortTempChunkHolder { @@ -63,21 +62,15 @@ public class UnsafeSortTempFileChunkHolder implements SortTempChunkHolder { * entry count */ private int entryCount; - /** * return row */ - private Object[] returnRow; - private int dimCnt; - private int complexCnt; - private int measureCnt; - private boolean[] isNoDictionaryDimensionColumn; - private DataType[] measureDataTypes; + private IntermediateSortTempRow returnRow; private int readBufferSize; private String compressorName; - private Object[][] currentBuffer; + private IntermediateSortTempRow[] currentBuffer; - private Object[][] backupBuffer; + private IntermediateSortTempRow[] backupBuffer; private boolean isBackupFilled; @@ -100,27 +93,21 @@ public class UnsafeSortTempFileChunkHolder implements SortTempChunkHolder { private int numberOfObjectRead; - private int nullSetWordsLength; - - private Comparator comparator; - + private TableFieldStat tableFieldStat; + private SortStepRowHandler sortStepRowHandler; + private Comparator comparator; /** * Constructor to initialize */ public UnsafeSortTempFileChunkHolder(File tempFile, SortParameters parameters) { // set temp file this.tempFile = tempFile; -this.dimCnt = parameters.getDimColCount(); -this.complexCnt = parameters.getComplexDimColCount(); -this.measureCnt = parameters.getMeasureColCount(); -this.isNoDictionaryDimensionColumn = parameters.getNoDictionaryDimnesionColumn(); -this.measureDataTypes = parameters.getMeasureDataType(); this.readBufferSize = parameters.getBufferSize(); this.compressorName = parameters.getSortTempCompressorName(); - +this.tableFieldStat = new TableFieldStat(parameters); +this.sortStepRowHandler = new SortStepRowHandler(tableFieldStat); this.executorService = Executors.newFixedThreadPool(1); -this.nullSetWordsLength = ((parameters.getMeasureColCount() - 1) >> 6) + 1; -comparator = new NewRowComparator(parameters.getNoDictionarySortColumn()); +comparator = new IntermediateSortTempRowComparator(parameters.getNoDictionarySortColumn()); initialize(); } @@ -169,11 +156,17 @@ public class UnsafeSortTempFileChunkHolder implements SortTempChunkHolder { * * @throws CarbonSortKeyAndGroupByException problem while reading */ + @Override public void readRow() throws CarbonSortKeyAndGroupByException { if (prefetch) { fillDataForPrefetch(); } else { - this.returnRow = getRowFromStream(); + try { +this.returnRow = sortStepRowHandler.readIntermediateSortTempRowFromInputStream(stream); +this.numberOfObjectRead++; + } catch (IOException e) { +throw new CarbonSortKeyAndGroupByException("Problems while reading row", e); + } } } @@ -207,63 +200,22 @@ public class UnsafeSortTempFileChunkHolder implements SortTempChunkHolder { } /** - * @retu