[2/2] carbondata git commit: [CARBONDATA-2018][DataLoad] Optimization in reading/writing for sort temp row

2018-02-12 Thread jackylk
[CARBONDATA-2018][DataLoad] Optimization in reading/writing for sort temp row

Pick up the no-sort fields in the row and pack them as bytes array and skip 
parsing them during merge sort to reduce CPU consumption

This closes #1792


Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo
Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/937bdb86
Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/937bdb86
Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/937bdb86

Branch: refs/heads/carbonstore
Commit: 937bdb867aeac5159f51de3582c4556c949bfb5c
Parents: 0d50f65
Author: xuchuanyin 
Authored: Thu Feb 8 14:35:14 2018 +0800
Committer: Jacky Li 
Committed: Mon Feb 12 16:13:21 2018 +0800

--
 .../carbondata/core/util/NonDictionaryUtil.java |  67 +--
 .../presto/util/CarbonDataStoreCreator.scala|   1 -
 .../load/DataLoadProcessorStepOnSpark.scala |   6 +-
 .../loading/row/IntermediateSortTempRow.java| 117 +
 .../loading/sort/SortStepRowHandler.java| 466 +++
 .../loading/sort/SortStepRowUtil.java   | 103 
 .../sort/unsafe/UnsafeCarbonRowPage.java| 331 ++---
 .../loading/sort/unsafe/UnsafeSortDataRows.java |  57 +--
 .../unsafe/comparator/UnsafeRowComparator.java  |  95 ++--
 .../UnsafeRowComparatorForNormalDIms.java   |  59 ---
 .../UnsafeRowComparatorForNormalDims.java   |  59 +++
 .../sort/unsafe/holder/SortTempChunkHolder.java |   3 +-
 .../holder/UnsafeFinalMergePageHolder.java  |  19 +-
 .../unsafe/holder/UnsafeInmemoryHolder.java |  21 +-
 .../holder/UnsafeSortTempFileChunkHolder.java   | 138 ++
 .../merger/UnsafeIntermediateFileMerger.java| 118 +
 .../UnsafeSingleThreadFinalSortFilesMerger.java |  27 +-
 .../merger/CompactionResultSortProcessor.java   |   1 -
 .../sort/sortdata/IntermediateFileMerger.java   |  95 +---
 .../IntermediateSortTempRowComparator.java  |  73 +++
 .../sort/sortdata/NewRowComparator.java |   5 +-
 .../sortdata/NewRowComparatorForNormalDims.java |   3 +-
 .../processing/sort/sortdata/RowComparator.java |  94 
 .../sortdata/RowComparatorForNormalDims.java|  62 ---
 .../SingleThreadFinalSortFilesMerger.java   |  25 +-
 .../processing/sort/sortdata/SortDataRows.java  |  85 +---
 .../sort/sortdata/SortTempFileChunkHolder.java  | 174 ++-
 .../sort/sortdata/TableFieldStat.java   | 176 +++
 28 files changed, 1186 insertions(+), 1294 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/carbondata/blob/937bdb86/core/src/main/java/org/apache/carbondata/core/util/NonDictionaryUtil.java
--
diff --git 
a/core/src/main/java/org/apache/carbondata/core/util/NonDictionaryUtil.java 
b/core/src/main/java/org/apache/carbondata/core/util/NonDictionaryUtil.java
index d6ecfbc..fca1244 100644
--- a/core/src/main/java/org/apache/carbondata/core/util/NonDictionaryUtil.java
+++ b/core/src/main/java/org/apache/carbondata/core/util/NonDictionaryUtil.java
@@ -82,18 +82,26 @@ public class NonDictionaryUtil {
   }
 
   /**
-   * Method to get the required Dimension from obj []
+   * Method to get the required dictionary Dimension from obj []
*
* @param index
* @param row
* @return
*/
-  public static Integer getDimension(int index, Object[] row) {
-
-Integer[] dimensions = (Integer[]) 
row[WriteStepRowUtil.DICTIONARY_DIMENSION];
-
+  public static int getDictDimension(int index, Object[] row) {
+int[] dimensions = (int[]) row[WriteStepRowUtil.DICTIONARY_DIMENSION];
 return dimensions[index];
+  }
 
+  /**
+   * Method to get the required non-dictionary & complex from 3-parted row
+   * @param index
+   * @param row
+   * @return
+   */
+  public static byte[] getNoDictOrComplex(int index, Object[] row) {
+byte[][] nonDictArray = (byte[][]) 
row[WriteStepRowUtil.NO_DICTIONARY_AND_COMPLEX];
+return nonDictArray[index];
   }
 
   /**
@@ -108,60 +116,11 @@ public class NonDictionaryUtil {
 return measures[index];
   }
 
-  public static byte[] getByteArrayForNoDictionaryCols(Object[] row) {
-
-return (byte[]) row[WriteStepRowUtil.NO_DICTIONARY_AND_COMPLEX];
-  }
-
   public static void prepareOutObj(Object[] out, int[] dimArray, byte[][] 
byteBufferArr,
   Object[] measureArray) {
-
 out[WriteStepRowUtil.DICTIONARY_DIMENSION] = dimArray;
 out[WriteStepRowUtil.NO_DICTIONARY_AND_COMPLEX] = byteBufferArr;
 out[WriteStepRowUtil.MEASURE] = measureArray;
 
   }
-
-  /**
-   * This method will extract the single dimension from the complete high card 
dims byte[].+ *
-   * The format of the byte [] will be,  Totallength,CompleteStartOffsets,Dat
-   *
-   * @param highCardArr
-   * @param index
-   * @param highCardinalityCount
-   * 

[2/2] carbondata git commit: [CARBONDATA-2018][DataLoad] Optimization in reading/writing for sort temp row

2018-02-08 Thread jackylk
[CARBONDATA-2018][DataLoad] Optimization in reading/writing for sort temp row

Pick up the no-sort fields in the row and pack them as bytes array and skip 
parsing them during merge sort to reduce CPU consumption

This closes #1792


Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo
Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/de92ea9a
Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/de92ea9a
Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/de92ea9a

Branch: refs/heads/carbonstore
Commit: de92ea9a123b17d903f2d1d4662299315c792954
Parents: cd7eed6
Author: xuchuanyin 
Authored: Thu Feb 8 14:35:14 2018 +0800
Committer: Jacky Li 
Committed: Fri Feb 9 01:01:22 2018 +0800

--
 .../carbondata/core/util/NonDictionaryUtil.java |  67 +--
 .../presto/util/CarbonDataStoreCreator.scala|   1 -
 .../load/DataLoadProcessorStepOnSpark.scala |   6 +-
 .../loading/row/IntermediateSortTempRow.java| 117 +
 .../loading/sort/SortStepRowHandler.java| 466 +++
 .../loading/sort/SortStepRowUtil.java   | 103 
 .../sort/unsafe/UnsafeCarbonRowPage.java| 331 ++---
 .../loading/sort/unsafe/UnsafeSortDataRows.java |  57 +--
 .../unsafe/comparator/UnsafeRowComparator.java  |  95 ++--
 .../UnsafeRowComparatorForNormalDIms.java   |  59 ---
 .../UnsafeRowComparatorForNormalDims.java   |  59 +++
 .../sort/unsafe/holder/SortTempChunkHolder.java |   3 +-
 .../holder/UnsafeFinalMergePageHolder.java  |  19 +-
 .../unsafe/holder/UnsafeInmemoryHolder.java |  21 +-
 .../holder/UnsafeSortTempFileChunkHolder.java   | 138 ++
 .../merger/UnsafeIntermediateFileMerger.java| 118 +
 .../UnsafeSingleThreadFinalSortFilesMerger.java |  27 +-
 .../merger/CompactionResultSortProcessor.java   |   1 -
 .../sort/sortdata/IntermediateFileMerger.java   |  95 +---
 .../IntermediateSortTempRowComparator.java  |  73 +++
 .../sort/sortdata/NewRowComparator.java |   5 +-
 .../sortdata/NewRowComparatorForNormalDims.java |   3 +-
 .../processing/sort/sortdata/RowComparator.java |  94 
 .../sortdata/RowComparatorForNormalDims.java|  62 ---
 .../SingleThreadFinalSortFilesMerger.java   |  25 +-
 .../processing/sort/sortdata/SortDataRows.java  |  85 +---
 .../sort/sortdata/SortTempFileChunkHolder.java  | 174 ++-
 .../sort/sortdata/TableFieldStat.java   | 176 +++
 28 files changed, 1186 insertions(+), 1294 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/carbondata/blob/de92ea9a/core/src/main/java/org/apache/carbondata/core/util/NonDictionaryUtil.java
--
diff --git 
a/core/src/main/java/org/apache/carbondata/core/util/NonDictionaryUtil.java 
b/core/src/main/java/org/apache/carbondata/core/util/NonDictionaryUtil.java
index d6ecfbc..fca1244 100644
--- a/core/src/main/java/org/apache/carbondata/core/util/NonDictionaryUtil.java
+++ b/core/src/main/java/org/apache/carbondata/core/util/NonDictionaryUtil.java
@@ -82,18 +82,26 @@ public class NonDictionaryUtil {
   }
 
   /**
-   * Method to get the required Dimension from obj []
+   * Method to get the required dictionary Dimension from obj []
*
* @param index
* @param row
* @return
*/
-  public static Integer getDimension(int index, Object[] row) {
-
-Integer[] dimensions = (Integer[]) 
row[WriteStepRowUtil.DICTIONARY_DIMENSION];
-
+  public static int getDictDimension(int index, Object[] row) {
+int[] dimensions = (int[]) row[WriteStepRowUtil.DICTIONARY_DIMENSION];
 return dimensions[index];
+  }
 
+  /**
+   * Method to get the required non-dictionary & complex from 3-parted row
+   * @param index
+   * @param row
+   * @return
+   */
+  public static byte[] getNoDictOrComplex(int index, Object[] row) {
+byte[][] nonDictArray = (byte[][]) 
row[WriteStepRowUtil.NO_DICTIONARY_AND_COMPLEX];
+return nonDictArray[index];
   }
 
   /**
@@ -108,60 +116,11 @@ public class NonDictionaryUtil {
 return measures[index];
   }
 
-  public static byte[] getByteArrayForNoDictionaryCols(Object[] row) {
-
-return (byte[]) row[WriteStepRowUtil.NO_DICTIONARY_AND_COMPLEX];
-  }
-
   public static void prepareOutObj(Object[] out, int[] dimArray, byte[][] 
byteBufferArr,
   Object[] measureArray) {
-
 out[WriteStepRowUtil.DICTIONARY_DIMENSION] = dimArray;
 out[WriteStepRowUtil.NO_DICTIONARY_AND_COMPLEX] = byteBufferArr;
 out[WriteStepRowUtil.MEASURE] = measureArray;
 
   }
-
-  /**
-   * This method will extract the single dimension from the complete high card 
dims byte[].+ *
-   * The format of the byte [] will be,  Totallength,CompleteStartOffsets,Dat
-   *
-   * @param highCardArr
-   * @param index
-   * @param highCardinalityCount
-   *