Build failed in Jenkins: carbondata-master-spark-2.1 #2056

2018-02-12 Thread Apache Jenkins Server
See 


--
Started by timer
[EnvInject] - Loading node environment variables.
Building remotely on H22 (ubuntu xenial) in workspace 

 > git rev-parse --is-inside-work-tree # timeout=10
Fetching changes from the remote Git repository
 > git config remote.origin.url 
 > https://git-wip-us.apache.org/repos/asf/carbondata.git # timeout=10
Fetching upstream changes from 
https://git-wip-us.apache.org/repos/asf/carbondata.git
 > git --version # timeout=10
using GIT_SSH to set credentials 
 > git fetch --tags --progress 
 > https://git-wip-us.apache.org/repos/asf/carbondata.git 
 > +refs/heads/*:refs/remotes/origin/*
ERROR: Error fetching remote repo 'origin'
hudson.plugins.git.GitException: Failed to fetch from 
https://git-wip-us.apache.org/repos/asf/carbondata.git
at hudson.plugins.git.GitSCM.fetchFrom(GitSCM.java:825)
at hudson.plugins.git.GitSCM.retrieveChanges(GitSCM.java:1092)
at hudson.plugins.git.GitSCM.checkout(GitSCM.java:1123)
at hudson.scm.SCM.checkout(SCM.java:495)
at hudson.model.AbstractProject.checkout(AbstractProject.java:1202)
at 
hudson.model.AbstractBuild$AbstractBuildExecution.defaultCheckout(AbstractBuild.java:574)
at jenkins.scm.SCMCheckoutStrategy.checkout(SCMCheckoutStrategy.java:86)
at 
hudson.model.AbstractBuild$AbstractBuildExecution.run(AbstractBuild.java:499)
at hudson.model.Run.execute(Run.java:1724)
at hudson.maven.MavenModuleSetBuild.run(MavenModuleSetBuild.java:543)
at hudson.model.ResourceController.execute(ResourceController.java:97)
at hudson.model.Executor.run(Executor.java:421)
Caused by: hudson.plugins.git.GitException: Command "git fetch --tags 
--progress https://git-wip-us.apache.org/repos/asf/carbondata.git 
+refs/heads/*:refs/remotes/origin/*" returned status code 128:
stdout: 
stderr: remote: Counting objects: 2950, done.
remote: Compressing objects:   0% (1/761)   remote: Compressing 
objects:   1% (8/761)   remote: Compressing objects:   2% (16/761)  
 remote: Compressing objects:   3% (23/761)   remote: Compressing 
objects:   4% (31/761)   remote: Compressing objects:   5% (39/761) 
  remote: Compressing objects:   6% (46/761)   remote: Compressing 
objects:   7% (54/761)   remote: Compressing objects:   8% (61/761) 
  remote: Compressing objects:   9% (69/761)   remote: Compressing 
objects:  10% (77/761)   remote: Compressing objects:  11% (84/761) 
  remote: Compressing objects:  12% (92/761)   remote: Compressing 
objects:  13% (99/761)   remote: Compressing objects:  14% (107/761)
   remote: Compressing objects:  15% (115/761)   remote: 
Compressing objects:  16% (122/761)   remote: Compressing objects:  17% 
(130/761)   remote: Compressing objects:  18% (137/761)   
remote: Compressing objects:  19% (145/761)   remote: Compressing 
objects:  20% (153/761)   remote: Compressing objects:  21% (160/761)   
remote: Compressing objects:  22% (168/761)   remote: 
Compressing objects:  23% (176/761)   remote: Compressing objects:  24% 
(183/761)   remote: Compressing objects:  25% (191/761)   
remote: Compressing objects:  26% (198/761)   remote: Compressing 
objects:  27% (206/761)   remote: Compressing objects:  28% (214/761)   
remote: Compressing objects:  29% (221/761)   remote: 
Compressing objects:  30% (229/761)   remote: Compressing objects:  31% 
(236/761)   remote: Compressing objects:  32% (244/761)   
remote: Compressing objects:  33% (252/761)   remote: Compressing 
objects:  34% (259/761)   remote: Compressing objects:  35% (267/761)   
remote: Compressing objects:  36% (274/761)   remote: 
Compressing objects:  37% (282/761)   remote: Compressing objects:  38% 
(290/761)   remote: Compressing objects:  39% (297/761)   
remote: Compressing objects:  40% (305/761)   remote: Compressing 
objects:  41% (313/761)   remote: Compressing objects:  42% (320/761)   
remote: Compressing objects:  43% (328/761)   remote: 
Compressing objects:  44% (335/761)   remote: Compressing objects:  45% 
(343/761)   remote: Compressing objects:  46% (351/761)   
remote: Compressing objects:  47% (358/761)   remote: Compressing 
objects:  48% (366/761)   remote: Compressing objects:  49% (373/761)   
remote: Compressing objects:  50% (381/761)   remote: 
Compressing objects:  51% (389/761)   remote: Compressing objects:  52% 
(396/761)   remote: Compressing objects:  53% 

carbondata git commit: Support generating assembling JAR for store-sdk module

2018-02-12 Thread jackylk
Repository: carbondata
Updated Branches:
  refs/heads/carbonstore fd450b151 -> 0b1521710


Support generating assembling JAR for store-sdk module

Support generating assembling JAR for store-sdk module and remove junit 
dependency

This closes #1976


Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo
Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/0b152171
Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/0b152171
Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/0b152171

Branch: refs/heads/carbonstore
Commit: 0b15217106a15da8d1607eb8b7b33a29b92595a9
Parents: fd450b1
Author: Jacky Li 
Authored: Tue Feb 13 09:12:09 2018 +0800
Committer: Jacky Li 
Committed: Tue Feb 13 15:20:26 2018 +0800

--
 common/pom.xml|  2 +
 core/pom.xml  |  2 +
 hadoop/pom.xml|  1 +
 integration/presto/pom.xml|  3 +-
 integration/spark-common-cluster-test/pom.xml |  2 +-
 integration/spark-common-test/pom.xml |  3 +-
 integration/spark-common/pom.xml  |  2 +-
 integration/spark2/pom.xml|  2 +-
 pom.xml   |  5 +++
 processing/pom.xml|  1 +
 store/sdk/pom.xml | 50 +-
 streaming/pom.xml |  1 -
 12 files changed, 66 insertions(+), 8 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/carbondata/blob/0b152171/common/pom.xml
--
diff --git a/common/pom.xml b/common/pom.xml
index 6343361..1b9723c 100644
--- a/common/pom.xml
+++ b/common/pom.xml
@@ -42,10 +42,12 @@
 
   junit
   junit
+  test
 
 
   org.jmockit
   jmockit
+  test
 
 
   org.apache.hadoop

http://git-wip-us.apache.org/repos/asf/carbondata/blob/0b152171/core/pom.xml
--
diff --git a/core/pom.xml b/core/pom.xml
index f874615..f990a86 100644
--- a/core/pom.xml
+++ b/core/pom.xml
@@ -70,10 +70,12 @@
 
   org.jmockit
   jmockit
+  test
 
 
   junit
   junit
+  test
 
 
   org.apache.spark

http://git-wip-us.apache.org/repos/asf/carbondata/blob/0b152171/hadoop/pom.xml
--
diff --git a/hadoop/pom.xml b/hadoop/pom.xml
index 206246f..f4f17f2 100644
--- a/hadoop/pom.xml
+++ b/hadoop/pom.xml
@@ -42,6 +42,7 @@
 
   junit
   junit
+  test
 
   
 

http://git-wip-us.apache.org/repos/asf/carbondata/blob/0b152171/integration/presto/pom.xml
--
diff --git a/integration/presto/pom.xml b/integration/presto/pom.xml
index 98bbe99..2d2c638 100644
--- a/integration/presto/pom.xml
+++ b/integration/presto/pom.xml
@@ -193,7 +193,7 @@
 
 
   org.scalatest
-  scalatest_2.11
+  scalatest_${scala.binary.version}
 
 
   org.apache.zookeeper
@@ -330,7 +330,6 @@
 
   org.scalatest
   scalatest_${scala.binary.version}
-  2.2.1
   test
 
 

http://git-wip-us.apache.org/repos/asf/carbondata/blob/0b152171/integration/spark-common-cluster-test/pom.xml
--
diff --git a/integration/spark-common-cluster-test/pom.xml 
b/integration/spark-common-cluster-test/pom.xml
index 8af8b14..e2239be 100644
--- a/integration/spark-common-cluster-test/pom.xml
+++ b/integration/spark-common-cluster-test/pom.xml
@@ -49,11 +49,11 @@
 
   junit
   junit
+  test
 
 
   org.scalatest
   scalatest_${scala.binary.version}
-  2.2.1
   test
 
 

http://git-wip-us.apache.org/repos/asf/carbondata/blob/0b152171/integration/spark-common-test/pom.xml
--
diff --git a/integration/spark-common-test/pom.xml 
b/integration/spark-common-test/pom.xml
index e3c7bd8..53f37df 100644
--- a/integration/spark-common-test/pom.xml
+++ b/integration/spark-common-test/pom.xml
@@ -106,16 +106,17 @@
 
   junit
   junit
+  test
 
 
   org.scalatest
   scalatest_${scala.binary.version}
-  2.2.1
   test
 
 
   org.jmockit
   jmockit
+  test
 
   
 

http://git-wip-us.apache.org/repos/asf/carbondata/blob/0b152171/integration/spark-common/pom.xml
--
diff --git a/integration/spark-common/pom.xml b/integration/spark-common/pom.xml
index e80593b..dae37bb 100644
--- 

carbondata git commit: [CARBONDATA-2023][DataLoad] Add size base block allocation in data loading

2018-02-12 Thread jackylk
Repository: carbondata
Updated Branches:
  refs/heads/carbonstore 937bdb867 -> fd450b151


[CARBONDATA-2023][DataLoad] Add size base block allocation in data loading

Carbondata assign blocks to nodes at the beginning of data loading.
Previous block allocation strategy is block number based and it will
suffer skewed data problem if the size of input files differs a lot.

We introduced a size based block allocation strategy to optimize data
loading performance in skewed data scenario.

This closes #1808


Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo
Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/fd450b15
Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/fd450b15
Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/fd450b15

Branch: refs/heads/carbonstore
Commit: fd450b151cb5858504116f560da5cd7a357894e5
Parents: 937bdb8
Author: xuchuanyin 
Authored: Thu Feb 8 14:42:39 2018 +0800
Committer: Jacky Li 
Committed: Mon Feb 12 19:22:46 2018 +0800

--
 .../constants/CarbonLoadOptionConstants.java|  10 +
 .../core/datastore/block/TableBlockInfo.java|  29 ++
 .../carbondata/core/util/CarbonProperties.java  |  11 +
 docs/useful-tips-on-carbondata.md   |   1 +
 .../spark/rdd/NewCarbonDataLoadRDD.scala|   4 +-
 .../spark/sql/hive/DistributionUtil.scala   |   2 +-
 .../spark/rdd/CarbonDataRDDFactory.scala|  18 +-
 .../merger/NodeMultiBlockRelation.java  |  40 ++
 .../processing/util/CarbonLoaderUtil.java   | 480 ---
 .../processing/util/CarbonLoaderUtilTest.java   | 125 +
 10 files changed, 545 insertions(+), 175 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/carbondata/blob/fd450b15/core/src/main/java/org/apache/carbondata/core/constants/CarbonLoadOptionConstants.java
--
diff --git 
a/core/src/main/java/org/apache/carbondata/core/constants/CarbonLoadOptionConstants.java
 
b/core/src/main/java/org/apache/carbondata/core/constants/CarbonLoadOptionConstants.java
index bcfeba0..a6bf60f 100644
--- 
a/core/src/main/java/org/apache/carbondata/core/constants/CarbonLoadOptionConstants.java
+++ 
b/core/src/main/java/org/apache/carbondata/core/constants/CarbonLoadOptionConstants.java
@@ -114,4 +114,14 @@ public final class CarbonLoadOptionConstants {
*/
   public static final int MAX_EXTERNAL_DICTIONARY_SIZE = 1000;
 
+  /**
+   * enable block size based block allocation while loading data. By default, 
carbondata assigns
+   * blocks to node based on block number. If this option is set to `true`, 
carbondata will
+   * consider block size first and make sure that all the nodes will process 
almost equal size of
+   * data. This option is especially useful when you encounter skewed data.
+   */
+  @CarbonProperty
+  public static final String ENABLE_CARBON_LOAD_SKEWED_DATA_OPTIMIZATION
+  = "carbon.load.skewedDataOptimization.enabled";
+  public static final String 
ENABLE_CARBON_LOAD_SKEWED_DATA_OPTIMIZATION_DEFAULT = "false";
 }

http://git-wip-us.apache.org/repos/asf/carbondata/blob/fd450b15/core/src/main/java/org/apache/carbondata/core/datastore/block/TableBlockInfo.java
--
diff --git 
a/core/src/main/java/org/apache/carbondata/core/datastore/block/TableBlockInfo.java
 
b/core/src/main/java/org/apache/carbondata/core/datastore/block/TableBlockInfo.java
index 907708c..6624311 100644
--- 
a/core/src/main/java/org/apache/carbondata/core/datastore/block/TableBlockInfo.java
+++ 
b/core/src/main/java/org/apache/carbondata/core/datastore/block/TableBlockInfo.java
@@ -19,6 +19,8 @@ package org.apache.carbondata.core.datastore.block;
 import java.io.IOException;
 import java.io.Serializable;
 import java.nio.charset.Charset;
+import java.util.Arrays;
+import java.util.Comparator;
 import java.util.HashMap;
 import java.util.Map;
 
@@ -92,6 +94,20 @@ public class TableBlockInfo implements Distributable, 
Serializable {
 
   private String dataMapWriterPath;
 
+  /**
+   * comparator to sort by block size in descending order.
+   * Since each line is not exactly the same, the size of a InputSplit may 
differs,
+   * so we allow some deviation for these splits.
+   */
+  public static final Comparator DATA_SIZE_DESC_COMPARATOR =
+  new Comparator() {
+@Override public int compare(Distributable o1, Distributable o2) {
+  long diff =
+  ((TableBlockInfo) o1).getBlockLength() - ((TableBlockInfo) 
o2).getBlockLength();
+  return diff < 0 ? 1 : (diff == 0 ? 0 : -1);
+}
+  };
+
   public TableBlockInfo(String filePath, long blockOffset, String segmentId,
   String[] locations, long blockLength, ColumnarFormatVersion version,
   

[1/2] carbondata git commit: [CARBONDATA-2018][DataLoad] Optimization in reading/writing for sort temp row

2018-02-12 Thread jackylk
Repository: carbondata
Updated Branches:
  refs/heads/carbonstore 0d50f6546 -> 937bdb867


http://git-wip-us.apache.org/repos/asf/carbondata/blob/937bdb86/processing/src/main/java/org/apache/carbondata/processing/loading/sort/unsafe/holder/UnsafeSortTempFileChunkHolder.java
--
diff --git 
a/processing/src/main/java/org/apache/carbondata/processing/loading/sort/unsafe/holder/UnsafeSortTempFileChunkHolder.java
 
b/processing/src/main/java/org/apache/carbondata/processing/loading/sort/unsafe/holder/UnsafeSortTempFileChunkHolder.java
index 11b3d43..527452a 100644
--- 
a/processing/src/main/java/org/apache/carbondata/processing/loading/sort/unsafe/holder/UnsafeSortTempFileChunkHolder.java
+++ 
b/processing/src/main/java/org/apache/carbondata/processing/loading/sort/unsafe/holder/UnsafeSortTempFileChunkHolder.java
@@ -31,15 +31,14 @@ import org.apache.carbondata.common.logging.LogService;
 import org.apache.carbondata.common.logging.LogServiceFactory;
 import org.apache.carbondata.core.constants.CarbonCommonConstants;
 import org.apache.carbondata.core.datastore.impl.FileFactory;
-import org.apache.carbondata.core.metadata.datatype.DataType;
-import org.apache.carbondata.core.metadata.datatype.DataTypes;
 import org.apache.carbondata.core.util.CarbonProperties;
 import org.apache.carbondata.core.util.CarbonUtil;
-import org.apache.carbondata.core.util.DataTypeUtil;
-import 
org.apache.carbondata.processing.loading.sort.unsafe.UnsafeCarbonRowPage;
+import org.apache.carbondata.processing.loading.row.IntermediateSortTempRow;
+import org.apache.carbondata.processing.loading.sort.SortStepRowHandler;
 import 
org.apache.carbondata.processing.sort.exception.CarbonSortKeyAndGroupByException;
-import org.apache.carbondata.processing.sort.sortdata.NewRowComparator;
+import 
org.apache.carbondata.processing.sort.sortdata.IntermediateSortTempRowComparator;
 import org.apache.carbondata.processing.sort.sortdata.SortParameters;
+import org.apache.carbondata.processing.sort.sortdata.TableFieldStat;
 
 public class UnsafeSortTempFileChunkHolder implements SortTempChunkHolder {
 
@@ -63,21 +62,15 @@ public class UnsafeSortTempFileChunkHolder implements 
SortTempChunkHolder {
* entry count
*/
   private int entryCount;
-
   /**
* return row
*/
-  private Object[] returnRow;
-  private int dimCnt;
-  private int complexCnt;
-  private int measureCnt;
-  private boolean[] isNoDictionaryDimensionColumn;
-  private DataType[] measureDataTypes;
+  private IntermediateSortTempRow returnRow;
   private int readBufferSize;
   private String compressorName;
-  private Object[][] currentBuffer;
+  private IntermediateSortTempRow[] currentBuffer;
 
-  private Object[][] backupBuffer;
+  private IntermediateSortTempRow[] backupBuffer;
 
   private boolean isBackupFilled;
 
@@ -100,27 +93,21 @@ public class UnsafeSortTempFileChunkHolder implements 
SortTempChunkHolder {
 
   private int numberOfObjectRead;
 
-  private int nullSetWordsLength;
-
-  private Comparator comparator;
-
+  private TableFieldStat tableFieldStat;
+  private SortStepRowHandler sortStepRowHandler;
+  private Comparator comparator;
   /**
* Constructor to initialize
*/
   public UnsafeSortTempFileChunkHolder(File tempFile, SortParameters 
parameters) {
 // set temp file
 this.tempFile = tempFile;
-this.dimCnt = parameters.getDimColCount();
-this.complexCnt = parameters.getComplexDimColCount();
-this.measureCnt = parameters.getMeasureColCount();
-this.isNoDictionaryDimensionColumn = 
parameters.getNoDictionaryDimnesionColumn();
-this.measureDataTypes = parameters.getMeasureDataType();
 this.readBufferSize = parameters.getBufferSize();
 this.compressorName = parameters.getSortTempCompressorName();
-
+this.tableFieldStat = new TableFieldStat(parameters);
+this.sortStepRowHandler = new SortStepRowHandler(tableFieldStat);
 this.executorService = Executors.newFixedThreadPool(1);
-this.nullSetWordsLength = ((parameters.getMeasureColCount() - 1) >> 6) + 1;
-comparator = new NewRowComparator(parameters.getNoDictionarySortColumn());
+comparator = new 
IntermediateSortTempRowComparator(parameters.getNoDictionarySortColumn());
 initialize();
   }
 
@@ -169,11 +156,17 @@ public class UnsafeSortTempFileChunkHolder implements 
SortTempChunkHolder {
*
* @throws CarbonSortKeyAndGroupByException problem while reading
*/
+  @Override
   public void readRow() throws CarbonSortKeyAndGroupByException {
 if (prefetch) {
   fillDataForPrefetch();
 } else {
-  this.returnRow = getRowFromStream();
+  try {
+this.returnRow = 
sortStepRowHandler.readIntermediateSortTempRowFromInputStream(stream);
+this.numberOfObjectRead++;
+  } catch (IOException e) {
+throw new CarbonSortKeyAndGroupByException("Problems while reading 
row", e);
+  }
 }
   }
 
@@ -207,63 +200,22 @@ 

[2/2] carbondata git commit: [CARBONDATA-2018][DataLoad] Optimization in reading/writing for sort temp row

2018-02-12 Thread jackylk
[CARBONDATA-2018][DataLoad] Optimization in reading/writing for sort temp row

Pick up the no-sort fields in the row and pack them as bytes array and skip 
parsing them during merge sort to reduce CPU consumption

This closes #1792


Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo
Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/937bdb86
Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/937bdb86
Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/937bdb86

Branch: refs/heads/carbonstore
Commit: 937bdb867aeac5159f51de3582c4556c949bfb5c
Parents: 0d50f65
Author: xuchuanyin 
Authored: Thu Feb 8 14:35:14 2018 +0800
Committer: Jacky Li 
Committed: Mon Feb 12 16:13:21 2018 +0800

--
 .../carbondata/core/util/NonDictionaryUtil.java |  67 +--
 .../presto/util/CarbonDataStoreCreator.scala|   1 -
 .../load/DataLoadProcessorStepOnSpark.scala |   6 +-
 .../loading/row/IntermediateSortTempRow.java| 117 +
 .../loading/sort/SortStepRowHandler.java| 466 +++
 .../loading/sort/SortStepRowUtil.java   | 103 
 .../sort/unsafe/UnsafeCarbonRowPage.java| 331 ++---
 .../loading/sort/unsafe/UnsafeSortDataRows.java |  57 +--
 .../unsafe/comparator/UnsafeRowComparator.java  |  95 ++--
 .../UnsafeRowComparatorForNormalDIms.java   |  59 ---
 .../UnsafeRowComparatorForNormalDims.java   |  59 +++
 .../sort/unsafe/holder/SortTempChunkHolder.java |   3 +-
 .../holder/UnsafeFinalMergePageHolder.java  |  19 +-
 .../unsafe/holder/UnsafeInmemoryHolder.java |  21 +-
 .../holder/UnsafeSortTempFileChunkHolder.java   | 138 ++
 .../merger/UnsafeIntermediateFileMerger.java| 118 +
 .../UnsafeSingleThreadFinalSortFilesMerger.java |  27 +-
 .../merger/CompactionResultSortProcessor.java   |   1 -
 .../sort/sortdata/IntermediateFileMerger.java   |  95 +---
 .../IntermediateSortTempRowComparator.java  |  73 +++
 .../sort/sortdata/NewRowComparator.java |   5 +-
 .../sortdata/NewRowComparatorForNormalDims.java |   3 +-
 .../processing/sort/sortdata/RowComparator.java |  94 
 .../sortdata/RowComparatorForNormalDims.java|  62 ---
 .../SingleThreadFinalSortFilesMerger.java   |  25 +-
 .../processing/sort/sortdata/SortDataRows.java  |  85 +---
 .../sort/sortdata/SortTempFileChunkHolder.java  | 174 ++-
 .../sort/sortdata/TableFieldStat.java   | 176 +++
 28 files changed, 1186 insertions(+), 1294 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/carbondata/blob/937bdb86/core/src/main/java/org/apache/carbondata/core/util/NonDictionaryUtil.java
--
diff --git 
a/core/src/main/java/org/apache/carbondata/core/util/NonDictionaryUtil.java 
b/core/src/main/java/org/apache/carbondata/core/util/NonDictionaryUtil.java
index d6ecfbc..fca1244 100644
--- a/core/src/main/java/org/apache/carbondata/core/util/NonDictionaryUtil.java
+++ b/core/src/main/java/org/apache/carbondata/core/util/NonDictionaryUtil.java
@@ -82,18 +82,26 @@ public class NonDictionaryUtil {
   }
 
   /**
-   * Method to get the required Dimension from obj []
+   * Method to get the required dictionary Dimension from obj []
*
* @param index
* @param row
* @return
*/
-  public static Integer getDimension(int index, Object[] row) {
-
-Integer[] dimensions = (Integer[]) 
row[WriteStepRowUtil.DICTIONARY_DIMENSION];
-
+  public static int getDictDimension(int index, Object[] row) {
+int[] dimensions = (int[]) row[WriteStepRowUtil.DICTIONARY_DIMENSION];
 return dimensions[index];
+  }
 
+  /**
+   * Method to get the required non-dictionary & complex from 3-parted row
+   * @param index
+   * @param row
+   * @return
+   */
+  public static byte[] getNoDictOrComplex(int index, Object[] row) {
+byte[][] nonDictArray = (byte[][]) 
row[WriteStepRowUtil.NO_DICTIONARY_AND_COMPLEX];
+return nonDictArray[index];
   }
 
   /**
@@ -108,60 +116,11 @@ public class NonDictionaryUtil {
 return measures[index];
   }
 
-  public static byte[] getByteArrayForNoDictionaryCols(Object[] row) {
-
-return (byte[]) row[WriteStepRowUtil.NO_DICTIONARY_AND_COMPLEX];
-  }
-
   public static void prepareOutObj(Object[] out, int[] dimArray, byte[][] 
byteBufferArr,
   Object[] measureArray) {
-
 out[WriteStepRowUtil.DICTIONARY_DIMENSION] = dimArray;
 out[WriteStepRowUtil.NO_DICTIONARY_AND_COMPLEX] = byteBufferArr;
 out[WriteStepRowUtil.MEASURE] = measureArray;
 
   }
-
-  /**
-   * This method will extract the single dimension from the complete high card 
dims byte[].+ *
-   * The format of the byte [] will be,  Totallength,CompleteStartOffsets,Dat
-   *
-   * @param highCardArr
-   * @param index
-   * @param highCardinalityCount
-   * 

[3/3] carbondata git commit: [CARBONDATA-2159] Remove carbon-spark dependency in store-sdk module

2018-02-12 Thread jackylk
[CARBONDATA-2159] Remove carbon-spark dependency in store-sdk module

To make assembling JAR of store-sdk module, it should not depend on 
carbon-spark module

This closes #1970


Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo
Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/0d50f654
Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/0d50f654
Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/0d50f654

Branch: refs/heads/carbonstore
Commit: 0d50f65461ae3855db66f44fa06e01174de50ccd
Parents: a848ccf
Author: Jacky Li 
Authored: Sun Feb 11 21:37:04 2018 +0800
Committer: Jacky Li 
Committed: Mon Feb 12 16:06:49 2018 +0800

--
 .../java/org/apache/carbondata/common/Maps.java |  39 ++
 .../org/apache/carbondata/common/Strings.java   |   3 +
 .../ConcurrentOperationException.java   |  56 +++
 .../exceptions/TableStatusLockException.java|  34 ++
 .../sql/InvalidLoadOptionException.java |  33 ++
 .../sql/MalformedCarbonCommandException.java|  75 +++
 .../sql/MalformedDataMapCommandException.java   |  37 ++
 .../exceptions/sql/NoSuchDataMapException.java  |  39 ++
 .../statusmanager/SegmentStatusManager.java | 124 +
 .../carbondata/core/util/DeleteLoadFolders.java | 200 
 .../preaggregate/TestPreAggCreateCommand.scala  |   2 +-
 .../preaggregate/TestPreAggregateDrop.scala |   2 +-
 .../timeseries/TestTimeSeriesCreateTable.scala  |   2 +-
 .../timeseries/TestTimeSeriesDropSuite.scala|   2 +-
 .../TestTimeseriesTableSelection.scala  |   2 +-
 .../TestDataLoadWithColumnsMoreThanSchema.scala |   3 +-
 .../dataload/TestGlobalSortDataLoad.scala   |   2 +-
 .../TestLoadDataWithDiffTimestampFormat.scala   |   2 +-
 .../TestLoadDataWithFileHeaderException.scala   |  11 +-
 ...ataWithMalformedCarbonCommandException.scala |   3 +-
 .../testsuite/dataload/TestLoadOptions.scala|   2 +-
 .../dataload/TestTableLevelBlockSize.scala  |   4 +-
 .../testsuite/datamap/TestDataMapCommand.scala  |   2 +-
 .../dataretention/DataRetentionTestCase.scala   |   2 +-
 .../spark/testsuite/datetype/DateTypeTest.scala |   2 +-
 .../testsuite/sortcolumns/TestSortColumns.scala |   3 +-
 integration/spark-common/pom.xml|   5 -
 .../exception/ConcurrentOperationException.java |  44 --
 .../MalformedCarbonCommandException.java|  69 ---
 .../MalformedDataMapCommandException.java   |  32 --
 .../spark/exception/NoSuchDataMapException.java |  33 --
 .../org/apache/carbondata/api/CarbonStore.scala |   6 +-
 .../spark/CarbonColumnValidator.scala   |   8 +-
 .../carbondata/spark/load/ValidateUtil.scala|  71 ---
 .../carbondata/spark/rdd/CarbonMergerRDD.scala  |   6 +-
 .../carbondata/spark/util/CommonUtil.scala  |  56 +--
 .../carbondata/spark/util/DataLoadingUtil.scala | 451 ---
 .../spark/util/GlobalDictionaryUtil.scala   |   2 +-
 .../spark/sql/catalyst/CarbonDDLSqlParser.scala |   2 +-
 .../spark/rdd/CarbonDataRDDFactory.scala|   4 +-
 .../spark/rdd/CarbonTableCompactor.scala|   2 +-
 .../org/apache/spark/sql/CarbonSource.scala |   2 +-
 .../datamap/CarbonCreateDataMapCommand.scala|   2 +-
 .../datamap/CarbonDropDataMapCommand.scala  |   2 +-
 .../CarbonAlterTableCompactionCommand.scala |  11 +-
 .../management/CarbonLoadDataCommand.scala  |  18 +-
 .../CarbonProjectForDeleteCommand.scala |   2 +-
 .../CarbonProjectForUpdateCommand.scala |   2 +-
 .../command/mutation/IUDCommonUtil.scala|   2 +-
 .../CreatePreAggregateTableCommand.scala|   3 +-
 .../preaaggregate/PreAggregateUtil.scala|   2 +-
 .../schema/CarbonAlterTableRenameCommand.scala  |   2 +-
 .../command/timeseries/TimeSeriesUtil.scala |   2 +-
 .../datasources/CarbonFileFormat.scala  |  15 +-
 .../sql/execution/strategy/DDLStrategy.scala|   2 +-
 .../strategy/StreamingTableStrategy.scala   |   2 +-
 .../execution/command/CarbonHiveCommands.scala  |   2 +-
 .../sql/parser/CarbonSpark2SqlParser.scala  |   2 +-
 .../spark/sql/parser/CarbonSparkSqlParser.scala |   2 +-
 .../org/apache/spark/util/AlterTableUtil.scala  |   2 +-
 .../org/apache/spark/util/TableAPIUtil.scala|   2 +-
 .../spark/sql/hive/CarbonSessionState.scala |   2 +-
 .../segmentreading/TestSegmentReading.scala |   2 +-
 .../spark/util/AllDictionaryTestCase.scala  |   4 +-
 .../util/ExternalColumnDictionaryTestCase.scala |   6 +-
 .../TestStreamingTableOperation.scala   |   2 +-
 .../bucketing/TableBucketingTestCase.scala  |   2 +-
 .../vectorreader/AddColumnTestCases.scala   |   2 +-
 .../loading/model/CarbonLoadModel.java  |  13 +-
 .../loading/model/CarbonLoadModelBuilder.java   | 322 +
 .../processing/loading/model/LoadOption.java| 251 +++
 

[2/3] carbondata git commit: [CARBONDATA-2159] Remove carbon-spark dependency in store-sdk module

2018-02-12 Thread jackylk
http://git-wip-us.apache.org/repos/asf/carbondata/blob/0d50f654/integration/spark-common/src/main/scala/org/apache/carbondata/spark/util/DataLoadingUtil.scala
--
diff --git 
a/integration/spark-common/src/main/scala/org/apache/carbondata/spark/util/DataLoadingUtil.scala
 
b/integration/spark-common/src/main/scala/org/apache/carbondata/spark/util/DataLoadingUtil.scala
deleted file mode 100644
index 9c5ab69..000
--- 
a/integration/spark-common/src/main/scala/org/apache/carbondata/spark/util/DataLoadingUtil.scala
+++ /dev/null
@@ -1,451 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.carbondata.spark.util
-
-import scala.collection.{immutable, mutable}
-import scala.collection.JavaConverters._
-
-import org.apache.commons.lang3.StringUtils
-import org.apache.hadoop.conf.Configuration
-import org.apache.spark.sql.util.CarbonException
-
-import org.apache.carbondata.common.constants.LoggerAction
-import org.apache.carbondata.common.logging.{LogService, LogServiceFactory}
-import org.apache.carbondata.core.constants.{CarbonCommonConstants, 
CarbonLoadOptionConstants}
-import org.apache.carbondata.core.locks.{CarbonLockFactory, CarbonLockUtil, 
LockUsage}
-import org.apache.carbondata.core.metadata.schema.table.CarbonTable
-import org.apache.carbondata.core.statusmanager.{SegmentStatus, 
SegmentStatusManager}
-import org.apache.carbondata.core.util.{CarbonProperties, CarbonUtil}
-import 
org.apache.carbondata.processing.loading.constants.DataLoadProcessorConstants
-import org.apache.carbondata.processing.loading.model.{CarbonDataLoadSchema, 
CarbonLoadModel}
-import org.apache.carbondata.processing.util.{CarbonLoaderUtil, 
DeleteLoadFolders, TableOptionConstant}
-import org.apache.carbondata.spark.exception.MalformedCarbonCommandException
-import org.apache.carbondata.spark.load.ValidateUtil
-
-/**
- * the util object of data loading
- */
-object DataLoadingUtil {
-
-  val LOGGER: LogService = 
LogServiceFactory.getLogService(this.getClass.getCanonicalName)
-
-  /**
-   * get data loading options and initialise default value
-   */
-  def getDataLoadingOptions(
-  carbonProperty: CarbonProperties,
-  options: immutable.Map[String, String]): mutable.Map[String, String] = {
-val optionsFinal = scala.collection.mutable.Map[String, String]()
-optionsFinal.put("delimiter", options.getOrElse("delimiter", ","))
-optionsFinal.put("quotechar", options.getOrElse("quotechar", "\""))
-optionsFinal.put("fileheader", options.getOrElse("fileheader", ""))
-optionsFinal.put("commentchar", options.getOrElse("commentchar", "#"))
-optionsFinal.put("columndict", options.getOrElse("columndict", null))
-
-optionsFinal.put("escapechar",
-  CarbonLoaderUtil.getEscapeChar(options.getOrElse("escapechar", "\\")))
-
-optionsFinal.put(
-  "serialization_null_format",
-  options.getOrElse("serialization_null_format", "\\N"))
-
-optionsFinal.put(
-  "bad_records_logger_enable",
-  options.getOrElse(
-"bad_records_logger_enable",
-carbonProperty.getProperty(
-  CarbonLoadOptionConstants.CARBON_OPTIONS_BAD_RECORDS_LOGGER_ENABLE,
-  
CarbonLoadOptionConstants.CARBON_OPTIONS_BAD_RECORDS_LOGGER_ENABLE_DEFAULT)))
-
-val badRecordActionValue = carbonProperty.getProperty(
-  CarbonCommonConstants.CARBON_BAD_RECORDS_ACTION,
-  CarbonCommonConstants.CARBON_BAD_RECORDS_ACTION_DEFAULT)
-
-optionsFinal.put(
-  "bad_records_action",
-  options.getOrElse(
-"bad_records_action",
-carbonProperty.getProperty(
-  CarbonLoadOptionConstants.CARBON_OPTIONS_BAD_RECORDS_ACTION,
-  badRecordActionValue)))
-
-optionsFinal.put(
-  "is_empty_data_bad_record",
-  options.getOrElse(
-"is_empty_data_bad_record",
-carbonProperty.getProperty(
-  CarbonLoadOptionConstants.CARBON_OPTIONS_IS_EMPTY_DATA_BAD_RECORD,
-  
CarbonLoadOptionConstants.CARBON_OPTIONS_IS_EMPTY_DATA_BAD_RECORD_DEFAULT)))
-
-optionsFinal.put(
-  "skip_empty_line",
-  options.getOrElse(
-"skip_empty_line",
-carbonProperty.getProperty(

[1/3] carbondata git commit: [CARBONDATA-2159] Remove carbon-spark dependency in store-sdk module

2018-02-12 Thread jackylk
Repository: carbondata
Updated Branches:
  refs/heads/carbonstore a848ccff8 -> 0d50f6546


http://git-wip-us.apache.org/repos/asf/carbondata/blob/0d50f654/processing/src/main/java/org/apache/carbondata/processing/loading/model/CarbonLoadModelBuilder.java
--
diff --git 
a/processing/src/main/java/org/apache/carbondata/processing/loading/model/CarbonLoadModelBuilder.java
 
b/processing/src/main/java/org/apache/carbondata/processing/loading/model/CarbonLoadModelBuilder.java
new file mode 100644
index 000..fbb93b6
--- /dev/null
+++ 
b/processing/src/main/java/org/apache/carbondata/processing/loading/model/CarbonLoadModelBuilder.java
@@ -0,0 +1,322 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.carbondata.processing.loading.model;
+
+import java.io.IOException;
+import java.text.SimpleDateFormat;
+import java.util.List;
+import java.util.Map;
+
+import org.apache.carbondata.common.Maps;
+import org.apache.carbondata.common.Strings;
+import org.apache.carbondata.common.annotations.InterfaceAudience;
+import org.apache.carbondata.common.constants.LoggerAction;
+import org.apache.carbondata.common.exceptions.sql.InvalidLoadOptionException;
+import org.apache.carbondata.core.constants.CarbonCommonConstants;
+import org.apache.carbondata.core.metadata.schema.table.CarbonTable;
+import org.apache.carbondata.core.metadata.schema.table.column.CarbonColumn;
+import org.apache.carbondata.core.util.CarbonProperties;
+import org.apache.carbondata.core.util.CarbonUtil;
+import 
org.apache.carbondata.processing.loading.constants.DataLoadProcessorConstants;
+import org.apache.carbondata.processing.loading.csvinput.CSVInputFormat;
+import org.apache.carbondata.processing.loading.sort.SortScopeOptions;
+import org.apache.carbondata.processing.util.TableOptionConstant;
+
+import org.apache.commons.lang.StringUtils;
+import org.apache.hadoop.conf.Configuration;
+
+/**
+ * Builder for {@link CarbonLoadModel}
+ */
+@InterfaceAudience.Developer
+public class CarbonLoadModelBuilder {
+
+  private CarbonTable table;
+
+  public CarbonLoadModelBuilder(CarbonTable table) {
+this.table = table;
+  }
+
+  /**
+   * build CarbonLoadModel for data loading
+   * @param options Load options from user input
+   * @return a new CarbonLoadModel instance
+   */
+  public CarbonLoadModel build(
+  Map options) throws InvalidLoadOptionException, 
IOException {
+Map optionsFinal = 
LoadOption.fillOptionWithDefaultValue(options);
+optionsFinal.put("sort_scope", "no_sort");
+if (!options.containsKey("fileheader")) {
+  List csvHeader = 
table.getCreateOrderColumn(table.getTableName());
+  String[] columns = new String[csvHeader.size()];
+  for (int i = 0; i < columns.length; i++) {
+columns[i] = csvHeader.get(i).getColName();
+  }
+  optionsFinal.put("fileheader", Strings.mkString(columns, ","));
+}
+CarbonLoadModel model = new CarbonLoadModel();
+
+// we have provided 'fileheader', so it hadoopConf can be null
+build(options, optionsFinal, model, null);
+
+// set default values
+
model.setTimestampformat(CarbonCommonConstants.CARBON_TIMESTAMP_DEFAULT_FORMAT);
+model.setDateFormat(CarbonCommonConstants.CARBON_DATE_DEFAULT_FORMAT);
+model.setUseOnePass(Boolean.parseBoolean(Maps.getOrDefault(options, 
"onepass", "false")));
+model.setDictionaryServerHost(Maps.getOrDefault(options, "dicthost", 
null));
+try {
+  
model.setDictionaryServerPort(Integer.parseInt(Maps.getOrDefault(options, 
"dictport", "-1")));
+} catch (NumberFormatException e) {
+  throw new InvalidLoadOptionException(e.getMessage());
+}
+return model;
+  }
+
+  /**
+   * build CarbonLoadModel for data loading
+   * @param options Load options from user input
+   * @param optionsFinal Load options that populated with default values for 
optional options
+   * @param carbonLoadModel The output load model
+   * @param hadoopConf hadoopConf is needed to read CSV header if there 
'fileheader' is not set in
+   *   user provided load options
+   */
+  public void build(
+  Map