[37/50] [abbrv] carbondata git commit: [CARBONDATA-2018][DataLoad] Optimization in reading/writing for sort temp row

2018-03-04 Thread jackylk
[CARBONDATA-2018][DataLoad] Optimization in reading/writing for sort temp row

Pick up the no-sort fields in the row and pack them as bytes array and skip 
parsing them during merge sort to reduce CPU consumption

This closes #1792


Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo
Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/28b5720f
Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/28b5720f
Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/28b5720f

Branch: refs/heads/carbonstore
Commit: 28b5720fcf1cbd0d4bdf3f04e7b0edd8f9492a8d
Parents: dcfe73b
Author: xuchuanyin 
Authored: Thu Feb 8 14:35:14 2018 +0800
Committer: Jacky Li 
Committed: Sun Mar 4 20:32:13 2018 +0800

--
 .../carbondata/core/util/NonDictionaryUtil.java |  67 +--
 .../presto/util/CarbonDataStoreCreator.scala|   1 -
 .../load/DataLoadProcessorStepOnSpark.scala |   6 +-
 .../loading/row/IntermediateSortTempRow.java| 117 +
 .../loading/sort/SortStepRowHandler.java| 466 +++
 .../loading/sort/SortStepRowUtil.java   | 103 
 .../sort/unsafe/UnsafeCarbonRowPage.java| 331 ++---
 .../loading/sort/unsafe/UnsafeSortDataRows.java |  57 +--
 .../unsafe/comparator/UnsafeRowComparator.java  |  95 ++--
 .../UnsafeRowComparatorForNormalDIms.java   |  59 ---
 .../UnsafeRowComparatorForNormalDims.java   |  59 +++
 .../sort/unsafe/holder/SortTempChunkHolder.java |   3 +-
 .../holder/UnsafeFinalMergePageHolder.java  |  19 +-
 .../unsafe/holder/UnsafeInmemoryHolder.java |  21 +-
 .../holder/UnsafeSortTempFileChunkHolder.java   | 138 ++
 .../merger/UnsafeIntermediateFileMerger.java| 118 +
 .../UnsafeSingleThreadFinalSortFilesMerger.java |  27 +-
 .../merger/CompactionResultSortProcessor.java   |   1 -
 .../sort/sortdata/IntermediateFileMerger.java   |  95 +---
 .../IntermediateSortTempRowComparator.java  |  73 +++
 .../sort/sortdata/NewRowComparator.java |   5 +-
 .../sortdata/NewRowComparatorForNormalDims.java |   3 +-
 .../processing/sort/sortdata/RowComparator.java |  94 
 .../sortdata/RowComparatorForNormalDims.java|  62 ---
 .../SingleThreadFinalSortFilesMerger.java   |  25 +-
 .../processing/sort/sortdata/SortDataRows.java  |  85 +---
 .../sort/sortdata/SortTempFileChunkHolder.java  | 174 ++-
 .../sort/sortdata/TableFieldStat.java   | 176 +++
 28 files changed, 1186 insertions(+), 1294 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/carbondata/blob/28b5720f/core/src/main/java/org/apache/carbondata/core/util/NonDictionaryUtil.java
--
diff --git 
a/core/src/main/java/org/apache/carbondata/core/util/NonDictionaryUtil.java 
b/core/src/main/java/org/apache/carbondata/core/util/NonDictionaryUtil.java
index d6ecfbc..fca1244 100644
--- a/core/src/main/java/org/apache/carbondata/core/util/NonDictionaryUtil.java
+++ b/core/src/main/java/org/apache/carbondata/core/util/NonDictionaryUtil.java
@@ -82,18 +82,26 @@ public class NonDictionaryUtil {
   }
 
   /**
-   * Method to get the required Dimension from obj []
+   * Method to get the required dictionary Dimension from obj []
*
* @param index
* @param row
* @return
*/
-  public static Integer getDimension(int index, Object[] row) {
-
-Integer[] dimensions = (Integer[]) 
row[WriteStepRowUtil.DICTIONARY_DIMENSION];
-
+  public static int getDictDimension(int index, Object[] row) {
+int[] dimensions = (int[]) row[WriteStepRowUtil.DICTIONARY_DIMENSION];
 return dimensions[index];
+  }
 
+  /**
+   * Method to get the required non-dictionary & complex from 3-parted row
+   * @param index
+   * @param row
+   * @return
+   */
+  public static byte[] getNoDictOrComplex(int index, Object[] row) {
+byte[][] nonDictArray = (byte[][]) 
row[WriteStepRowUtil.NO_DICTIONARY_AND_COMPLEX];
+return nonDictArray[index];
   }
 
   /**
@@ -108,60 +116,11 @@ public class NonDictionaryUtil {
 return measures[index];
   }
 
-  public static byte[] getByteArrayForNoDictionaryCols(Object[] row) {
-
-return (byte[]) row[WriteStepRowUtil.NO_DICTIONARY_AND_COMPLEX];
-  }
-
   public static void prepareOutObj(Object[] out, int[] dimArray, byte[][] 
byteBufferArr,
   Object[] measureArray) {
-
 out[WriteStepRowUtil.DICTIONARY_DIMENSION] = dimArray;
 out[WriteStepRowUtil.NO_DICTIONARY_AND_COMPLEX] = byteBufferArr;
 out[WriteStepRowUtil.MEASURE] = measureArray;
 
   }
-
-  /**
-   * This method will extract the single dimension from the complete high card 
dims byte[].+ *
-   * The format of the byte [] will be,  Totallength,CompleteStartOffsets,Dat
-   *
-   * @param highCardArr
-   * @param index
-   * @param highCardinalityCount
-   * @param outBuffer
-   */
-  public static void extr

[37/50] [abbrv] carbondata git commit: [CARBONDATA-2018][DataLoad] Optimization in reading/writing for sort temp row

2018-03-04 Thread jackylk
[CARBONDATA-2018][DataLoad] Optimization in reading/writing for sort temp row

Pick up the no-sort fields in the row and pack them as bytes array and skip 
parsing them during merge sort to reduce CPU consumption

This closes #1792


Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo
Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/28b5720f
Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/28b5720f
Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/28b5720f

Branch: refs/heads/carbonstore-rebase5
Commit: 28b5720fcf1cbd0d4bdf3f04e7b0edd8f9492a8d
Parents: dcfe73b
Author: xuchuanyin 
Authored: Thu Feb 8 14:35:14 2018 +0800
Committer: Jacky Li 
Committed: Sun Mar 4 20:32:13 2018 +0800

--
 .../carbondata/core/util/NonDictionaryUtil.java |  67 +--
 .../presto/util/CarbonDataStoreCreator.scala|   1 -
 .../load/DataLoadProcessorStepOnSpark.scala |   6 +-
 .../loading/row/IntermediateSortTempRow.java| 117 +
 .../loading/sort/SortStepRowHandler.java| 466 +++
 .../loading/sort/SortStepRowUtil.java   | 103 
 .../sort/unsafe/UnsafeCarbonRowPage.java| 331 ++---
 .../loading/sort/unsafe/UnsafeSortDataRows.java |  57 +--
 .../unsafe/comparator/UnsafeRowComparator.java  |  95 ++--
 .../UnsafeRowComparatorForNormalDIms.java   |  59 ---
 .../UnsafeRowComparatorForNormalDims.java   |  59 +++
 .../sort/unsafe/holder/SortTempChunkHolder.java |   3 +-
 .../holder/UnsafeFinalMergePageHolder.java  |  19 +-
 .../unsafe/holder/UnsafeInmemoryHolder.java |  21 +-
 .../holder/UnsafeSortTempFileChunkHolder.java   | 138 ++
 .../merger/UnsafeIntermediateFileMerger.java| 118 +
 .../UnsafeSingleThreadFinalSortFilesMerger.java |  27 +-
 .../merger/CompactionResultSortProcessor.java   |   1 -
 .../sort/sortdata/IntermediateFileMerger.java   |  95 +---
 .../IntermediateSortTempRowComparator.java  |  73 +++
 .../sort/sortdata/NewRowComparator.java |   5 +-
 .../sortdata/NewRowComparatorForNormalDims.java |   3 +-
 .../processing/sort/sortdata/RowComparator.java |  94 
 .../sortdata/RowComparatorForNormalDims.java|  62 ---
 .../SingleThreadFinalSortFilesMerger.java   |  25 +-
 .../processing/sort/sortdata/SortDataRows.java  |  85 +---
 .../sort/sortdata/SortTempFileChunkHolder.java  | 174 ++-
 .../sort/sortdata/TableFieldStat.java   | 176 +++
 28 files changed, 1186 insertions(+), 1294 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/carbondata/blob/28b5720f/core/src/main/java/org/apache/carbondata/core/util/NonDictionaryUtil.java
--
diff --git 
a/core/src/main/java/org/apache/carbondata/core/util/NonDictionaryUtil.java 
b/core/src/main/java/org/apache/carbondata/core/util/NonDictionaryUtil.java
index d6ecfbc..fca1244 100644
--- a/core/src/main/java/org/apache/carbondata/core/util/NonDictionaryUtil.java
+++ b/core/src/main/java/org/apache/carbondata/core/util/NonDictionaryUtil.java
@@ -82,18 +82,26 @@ public class NonDictionaryUtil {
   }
 
   /**
-   * Method to get the required Dimension from obj []
+   * Method to get the required dictionary Dimension from obj []
*
* @param index
* @param row
* @return
*/
-  public static Integer getDimension(int index, Object[] row) {
-
-Integer[] dimensions = (Integer[]) 
row[WriteStepRowUtil.DICTIONARY_DIMENSION];
-
+  public static int getDictDimension(int index, Object[] row) {
+int[] dimensions = (int[]) row[WriteStepRowUtil.DICTIONARY_DIMENSION];
 return dimensions[index];
+  }
 
+  /**
+   * Method to get the required non-dictionary & complex from 3-parted row
+   * @param index
+   * @param row
+   * @return
+   */
+  public static byte[] getNoDictOrComplex(int index, Object[] row) {
+byte[][] nonDictArray = (byte[][]) 
row[WriteStepRowUtil.NO_DICTIONARY_AND_COMPLEX];
+return nonDictArray[index];
   }
 
   /**
@@ -108,60 +116,11 @@ public class NonDictionaryUtil {
 return measures[index];
   }
 
-  public static byte[] getByteArrayForNoDictionaryCols(Object[] row) {
-
-return (byte[]) row[WriteStepRowUtil.NO_DICTIONARY_AND_COMPLEX];
-  }
-
   public static void prepareOutObj(Object[] out, int[] dimArray, byte[][] 
byteBufferArr,
   Object[] measureArray) {
-
 out[WriteStepRowUtil.DICTIONARY_DIMENSION] = dimArray;
 out[WriteStepRowUtil.NO_DICTIONARY_AND_COMPLEX] = byteBufferArr;
 out[WriteStepRowUtil.MEASURE] = measureArray;
 
   }
-
-  /**
-   * This method will extract the single dimension from the complete high card 
dims byte[].+ *
-   * The format of the byte [] will be,  Totallength,CompleteStartOffsets,Dat
-   *
-   * @param highCardArr
-   * @param index
-   * @param highCardinalityCount
-   * @param outBuffer
-   */
-  public static v

[37/50] [abbrv] carbondata git commit: [CARBONDATA-2018][DataLoad] Optimization in reading/writing for sort temp row

2018-03-02 Thread jackylk
http://git-wip-us.apache.org/repos/asf/carbondata/blob/f9d46b18/processing/src/main/java/org/apache/carbondata/processing/loading/sort/unsafe/holder/UnsafeSortTempFileChunkHolder.java
--
diff --git 
a/processing/src/main/java/org/apache/carbondata/processing/loading/sort/unsafe/holder/UnsafeSortTempFileChunkHolder.java
 
b/processing/src/main/java/org/apache/carbondata/processing/loading/sort/unsafe/holder/UnsafeSortTempFileChunkHolder.java
index 11b3d43..527452a 100644
--- 
a/processing/src/main/java/org/apache/carbondata/processing/loading/sort/unsafe/holder/UnsafeSortTempFileChunkHolder.java
+++ 
b/processing/src/main/java/org/apache/carbondata/processing/loading/sort/unsafe/holder/UnsafeSortTempFileChunkHolder.java
@@ -31,15 +31,14 @@ import org.apache.carbondata.common.logging.LogService;
 import org.apache.carbondata.common.logging.LogServiceFactory;
 import org.apache.carbondata.core.constants.CarbonCommonConstants;
 import org.apache.carbondata.core.datastore.impl.FileFactory;
-import org.apache.carbondata.core.metadata.datatype.DataType;
-import org.apache.carbondata.core.metadata.datatype.DataTypes;
 import org.apache.carbondata.core.util.CarbonProperties;
 import org.apache.carbondata.core.util.CarbonUtil;
-import org.apache.carbondata.core.util.DataTypeUtil;
-import 
org.apache.carbondata.processing.loading.sort.unsafe.UnsafeCarbonRowPage;
+import org.apache.carbondata.processing.loading.row.IntermediateSortTempRow;
+import org.apache.carbondata.processing.loading.sort.SortStepRowHandler;
 import 
org.apache.carbondata.processing.sort.exception.CarbonSortKeyAndGroupByException;
-import org.apache.carbondata.processing.sort.sortdata.NewRowComparator;
+import 
org.apache.carbondata.processing.sort.sortdata.IntermediateSortTempRowComparator;
 import org.apache.carbondata.processing.sort.sortdata.SortParameters;
+import org.apache.carbondata.processing.sort.sortdata.TableFieldStat;
 
 public class UnsafeSortTempFileChunkHolder implements SortTempChunkHolder {
 
@@ -63,21 +62,15 @@ public class UnsafeSortTempFileChunkHolder implements 
SortTempChunkHolder {
* entry count
*/
   private int entryCount;
-
   /**
* return row
*/
-  private Object[] returnRow;
-  private int dimCnt;
-  private int complexCnt;
-  private int measureCnt;
-  private boolean[] isNoDictionaryDimensionColumn;
-  private DataType[] measureDataTypes;
+  private IntermediateSortTempRow returnRow;
   private int readBufferSize;
   private String compressorName;
-  private Object[][] currentBuffer;
+  private IntermediateSortTempRow[] currentBuffer;
 
-  private Object[][] backupBuffer;
+  private IntermediateSortTempRow[] backupBuffer;
 
   private boolean isBackupFilled;
 
@@ -100,27 +93,21 @@ public class UnsafeSortTempFileChunkHolder implements 
SortTempChunkHolder {
 
   private int numberOfObjectRead;
 
-  private int nullSetWordsLength;
-
-  private Comparator comparator;
-
+  private TableFieldStat tableFieldStat;
+  private SortStepRowHandler sortStepRowHandler;
+  private Comparator comparator;
   /**
* Constructor to initialize
*/
   public UnsafeSortTempFileChunkHolder(File tempFile, SortParameters 
parameters) {
 // set temp file
 this.tempFile = tempFile;
-this.dimCnt = parameters.getDimColCount();
-this.complexCnt = parameters.getComplexDimColCount();
-this.measureCnt = parameters.getMeasureColCount();
-this.isNoDictionaryDimensionColumn = 
parameters.getNoDictionaryDimnesionColumn();
-this.measureDataTypes = parameters.getMeasureDataType();
 this.readBufferSize = parameters.getBufferSize();
 this.compressorName = parameters.getSortTempCompressorName();
-
+this.tableFieldStat = new TableFieldStat(parameters);
+this.sortStepRowHandler = new SortStepRowHandler(tableFieldStat);
 this.executorService = Executors.newFixedThreadPool(1);
-this.nullSetWordsLength = ((parameters.getMeasureColCount() - 1) >> 6) + 1;
-comparator = new NewRowComparator(parameters.getNoDictionarySortColumn());
+comparator = new 
IntermediateSortTempRowComparator(parameters.getNoDictionarySortColumn());
 initialize();
   }
 
@@ -169,11 +156,17 @@ public class UnsafeSortTempFileChunkHolder implements 
SortTempChunkHolder {
*
* @throws CarbonSortKeyAndGroupByException problem while reading
*/
+  @Override
   public void readRow() throws CarbonSortKeyAndGroupByException {
 if (prefetch) {
   fillDataForPrefetch();
 } else {
-  this.returnRow = getRowFromStream();
+  try {
+this.returnRow = 
sortStepRowHandler.readIntermediateSortTempRowFromInputStream(stream);
+this.numberOfObjectRead++;
+  } catch (IOException e) {
+throw new CarbonSortKeyAndGroupByException("Problems while reading 
row", e);
+  }
 }
   }
 
@@ -207,63 +200,22 @@ public class UnsafeSortTempFileChunkHolder implements 
SortTempChunkHolder {
   }
 
   /**
-   * @retu