carbondata git commit: [CARBONDATA-2172][Lucene] Add text_columns property for Lucene DataMap

2018-03-04 Thread jackylk
Repository: carbondata
Updated Branches:
  refs/heads/datamap 69eb26e68 -> 9390abf6a


[CARBONDATA-2172][Lucene] Add text_columns property for Lucene DataMap

Add text_columns property for Lucene DataMap

This closes #2019


Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo
Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/9390abf6
Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/9390abf6
Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/9390abf6

Branch: refs/heads/datamap
Commit: 9390abf6a0a79261b9de981502df9017aab68f96
Parents: 69eb26e
Author: QiangCai 
Authored: Thu Mar 1 15:40:01 2018 +0800
Committer: Jacky Li 
Committed: Mon Mar 5 10:52:51 2018 +0800

--
 .../core/datamap/DataMapStoreManager.java   |  3 +-
 .../core/datamap/dev/DataMapFactory.java|  4 +-
 datamap/lucene/pom.xml  |  5 ++
 .../lucene/LuceneDataMapFactoryBase.java| 89 ++--
 .../lucene/LuceneFineGrainDataMapSuite.scala| 58 -
 5 files changed, 126 insertions(+), 33 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/carbondata/blob/9390abf6/core/src/main/java/org/apache/carbondata/core/datamap/DataMapStoreManager.java
--
diff --git 
a/core/src/main/java/org/apache/carbondata/core/datamap/DataMapStoreManager.java
 
b/core/src/main/java/org/apache/carbondata/core/datamap/DataMapStoreManager.java
index 0223ae2..e7c72e8 100644
--- 
a/core/src/main/java/org/apache/carbondata/core/datamap/DataMapStoreManager.java
+++ 
b/core/src/main/java/org/apache/carbondata/core/datamap/DataMapStoreManager.java
@@ -163,7 +163,8 @@ public final class DataMapStoreManager {
   }
 
   public TableDataMap registerDataMap(AbsoluteTableIdentifier identifier,
-  DataMapSchema dataMapSchema,  DataMapFactory dataMapFactory) throws 
IOException {
+  DataMapSchema dataMapSchema,  DataMapFactory dataMapFactory)
+  throws IOException, MalformedDataMapCommandException {
 String table = identifier.getCarbonTableIdentifier().getTableUniqueName();
 // Just update the segmentRefreshMap with the table if not added.
 getTableSegmentRefresher(identifier);

http://git-wip-us.apache.org/repos/asf/carbondata/blob/9390abf6/core/src/main/java/org/apache/carbondata/core/datamap/dev/DataMapFactory.java
--
diff --git 
a/core/src/main/java/org/apache/carbondata/core/datamap/dev/DataMapFactory.java 
b/core/src/main/java/org/apache/carbondata/core/datamap/dev/DataMapFactory.java
index 7bf04c9..ef9bb66 100644
--- 
a/core/src/main/java/org/apache/carbondata/core/datamap/dev/DataMapFactory.java
+++ 
b/core/src/main/java/org/apache/carbondata/core/datamap/dev/DataMapFactory.java
@@ -19,6 +19,7 @@ package org.apache.carbondata.core.datamap.dev;
 import java.io.IOException;
 import java.util.List;
 
+import 
org.apache.carbondata.common.exceptions.sql.MalformedDataMapCommandException;
 import org.apache.carbondata.core.datamap.DataMapDistributable;
 import org.apache.carbondata.core.datamap.DataMapLevel;
 import org.apache.carbondata.core.datamap.DataMapMeta;
@@ -34,7 +35,8 @@ public interface DataMapFactory {
   /**
* Initialization of Datamap factory with the identifier and datamap name
*/
-  void init(AbsoluteTableIdentifier identifier, DataMapSchema dataMapSchema) 
throws IOException;
+  void init(AbsoluteTableIdentifier identifier, DataMapSchema dataMapSchema)
+  throws IOException, MalformedDataMapCommandException;
 
   /**
* Return a new write for this datamap

http://git-wip-us.apache.org/repos/asf/carbondata/blob/9390abf6/datamap/lucene/pom.xml
--
diff --git a/datamap/lucene/pom.xml b/datamap/lucene/pom.xml
index ee504c6..4019065 100644
--- a/datamap/lucene/pom.xml
+++ b/datamap/lucene/pom.xml
@@ -26,6 +26,11 @@
   ${project.version}
 
 
+  org.apache.commons
+  commons-lang3
+  3.3.2
+
+
   org.apache.lucene
   lucene-core
   ${lucene.version}

http://git-wip-us.apache.org/repos/asf/carbondata/blob/9390abf6/datamap/lucene/src/main/java/org/apache/carbondata/datamap/lucene/LuceneDataMapFactoryBase.java
--
diff --git 
a/datamap/lucene/src/main/java/org/apache/carbondata/datamap/lucene/LuceneDataMapFactoryBase.java
 
b/datamap/lucene/src/main/java/org/apache/carbondata/datamap/lucene/LuceneDataMapFactoryBase.java
index 5eb7054..3a1adab 100644
--- 
a/datamap/lucene/src/main/java/org/apache/carbondata/datamap/lucene/LuceneDataMapFactoryBase.java
+++ 

svn commit: r25397 - in /dev/carbondata/1.3.1-rc1: ./ apache-carbondata-1.3.1-source-release.zip apache-carbondata-1.3.1-source-release.zip.asc apache-carbondata-1.3.1-source-release.zip.md5 apache-ca

2018-03-04 Thread ravipesala
Author: ravipesala
Date: Sun Mar  4 16:51:34 2018
New Revision: 25397

Log:
Upload 1.3.1-rc1

Added:
dev/carbondata/1.3.1-rc1/
dev/carbondata/1.3.1-rc1/apache-carbondata-1.3.1-source-release.zip   (with 
props)
dev/carbondata/1.3.1-rc1/apache-carbondata-1.3.1-source-release.zip.asc
dev/carbondata/1.3.1-rc1/apache-carbondata-1.3.1-source-release.zip.md5
dev/carbondata/1.3.1-rc1/apache-carbondata-1.3.1-source-release.zip.sha512

Added: dev/carbondata/1.3.1-rc1/apache-carbondata-1.3.1-source-release.zip
==
Binary file - no diff available.

Propchange: dev/carbondata/1.3.1-rc1/apache-carbondata-1.3.1-source-release.zip
--
svn:mime-type = application/octet-stream

Added: dev/carbondata/1.3.1-rc1/apache-carbondata-1.3.1-source-release.zip.asc
==
--- dev/carbondata/1.3.1-rc1/apache-carbondata-1.3.1-source-release.zip.asc 
(added)
+++ dev/carbondata/1.3.1-rc1/apache-carbondata-1.3.1-source-release.zip.asc Sun 
Mar  4 16:51:34 2018
@@ -0,0 +1,17 @@
+-BEGIN PGP SIGNATURE-
+Version: GnuPG v1
+
+iQIcBAABAgAGBQJam979AAoJELrXKninsbLutJ0P/3JDIzA9r8KpksjWUBORD7QP
+VyJw2oTP4ibqQdKwDB+P5UxQOx9OPiXpAvIE6VazOxguvZ3BzjHdpA/n/qH6gCj7
+gS/V4RelNx+bTXZNN1Q2KjIFTYKwOZ1lBjDHOJsOPjc4YbLsuiunh4CmYgaatiIN
+K3hXwRI8VpKojrSttbFY4We6KrsWuX9mfzbDuj1LBlybWbPh5AX3OQMbz9WiYAb/
+HOFQmYEJpVFJhmKWwF8qx3H2uo/dIuJyKsemE+OupJsozvHU9zYw+SJP+Xdn450W
+8Q5nepe5+i+VDPUbqezesAzbZkK3wLoUJ8eunHwM6ey0sOASJBK1ZRcEnCmASI5B
+CHwndg28yOe+52dYMCl7EKXTA31rh2Xu0L9Avzb4WrtZs2szEBiDi5yN6PXpzFVR
+oEs/av6hhu1+ihYeyq0h8XD/3UOU4ftCqIh40jiZJNdqOYTCsB7M/YDIxRPRrCiI
+VGOVAvEjPybSMFEmR4AfDnYdNTRqF/DL04qydmFxJSnw7URc44eE39Hh9xlMZ5n9
+fCXQyW9gd+CQ81sPRd6sRubQU1dCucY3biZm8yzaHAIbFGcBePWubfIdJoeu1sLM
+SGUC5c0TPViVU8IQ68gURIutzvtkjGYqQlGnS7CrlFljyjpdHJB5ofoin1Zvpc3r
+Hiu/mR2Z3hfkevyKCW36
+=mUae
+-END PGP SIGNATURE-

Added: dev/carbondata/1.3.1-rc1/apache-carbondata-1.3.1-source-release.zip.md5
==
--- dev/carbondata/1.3.1-rc1/apache-carbondata-1.3.1-source-release.zip.md5 
(added)
+++ dev/carbondata/1.3.1-rc1/apache-carbondata-1.3.1-source-release.zip.md5 Sun 
Mar  4 16:51:34 2018
@@ -0,0 +1 @@
+fa97a8aaf55d7c54197ebf3d942ceadc  apache-carbondata-1.3.1-source-release.zip

Added: 
dev/carbondata/1.3.1-rc1/apache-carbondata-1.3.1-source-release.zip.sha512
==
--- dev/carbondata/1.3.1-rc1/apache-carbondata-1.3.1-source-release.zip.sha512 
(added)
+++ dev/carbondata/1.3.1-rc1/apache-carbondata-1.3.1-source-release.zip.sha512 
Sun Mar  4 16:51:34 2018
@@ -0,0 +1 @@
+7f3d94cf4a7f1bb56195e4ca73eb0529e401b5fc8e8bb8a0954305da830e4bcbf7501282f9fa713b9478c1029921d4ca3ad8f9ff3813472bf0d631af8373e984
  apache-carbondata-1.3.1-source-release.zip




[26/50] [abbrv] carbondata git commit: [CARBONDATA-2018][DataLoad] Optimization in reading/writing for sort temp row

2018-03-04 Thread jackylk
http://git-wip-us.apache.org/repos/asf/carbondata/blob/586ab702/processing/src/main/java/org/apache/carbondata/processing/loading/sort/unsafe/holder/UnsafeSortTempFileChunkHolder.java
--
diff --git 
a/processing/src/main/java/org/apache/carbondata/processing/loading/sort/unsafe/holder/UnsafeSortTempFileChunkHolder.java
 
b/processing/src/main/java/org/apache/carbondata/processing/loading/sort/unsafe/holder/UnsafeSortTempFileChunkHolder.java
index 11b3d43..527452a 100644
--- 
a/processing/src/main/java/org/apache/carbondata/processing/loading/sort/unsafe/holder/UnsafeSortTempFileChunkHolder.java
+++ 
b/processing/src/main/java/org/apache/carbondata/processing/loading/sort/unsafe/holder/UnsafeSortTempFileChunkHolder.java
@@ -31,15 +31,14 @@ import org.apache.carbondata.common.logging.LogService;
 import org.apache.carbondata.common.logging.LogServiceFactory;
 import org.apache.carbondata.core.constants.CarbonCommonConstants;
 import org.apache.carbondata.core.datastore.impl.FileFactory;
-import org.apache.carbondata.core.metadata.datatype.DataType;
-import org.apache.carbondata.core.metadata.datatype.DataTypes;
 import org.apache.carbondata.core.util.CarbonProperties;
 import org.apache.carbondata.core.util.CarbonUtil;
-import org.apache.carbondata.core.util.DataTypeUtil;
-import 
org.apache.carbondata.processing.loading.sort.unsafe.UnsafeCarbonRowPage;
+import org.apache.carbondata.processing.loading.row.IntermediateSortTempRow;
+import org.apache.carbondata.processing.loading.sort.SortStepRowHandler;
 import 
org.apache.carbondata.processing.sort.exception.CarbonSortKeyAndGroupByException;
-import org.apache.carbondata.processing.sort.sortdata.NewRowComparator;
+import 
org.apache.carbondata.processing.sort.sortdata.IntermediateSortTempRowComparator;
 import org.apache.carbondata.processing.sort.sortdata.SortParameters;
+import org.apache.carbondata.processing.sort.sortdata.TableFieldStat;
 
 public class UnsafeSortTempFileChunkHolder implements SortTempChunkHolder {
 
@@ -63,21 +62,15 @@ public class UnsafeSortTempFileChunkHolder implements 
SortTempChunkHolder {
* entry count
*/
   private int entryCount;
-
   /**
* return row
*/
-  private Object[] returnRow;
-  private int dimCnt;
-  private int complexCnt;
-  private int measureCnt;
-  private boolean[] isNoDictionaryDimensionColumn;
-  private DataType[] measureDataTypes;
+  private IntermediateSortTempRow returnRow;
   private int readBufferSize;
   private String compressorName;
-  private Object[][] currentBuffer;
+  private IntermediateSortTempRow[] currentBuffer;
 
-  private Object[][] backupBuffer;
+  private IntermediateSortTempRow[] backupBuffer;
 
   private boolean isBackupFilled;
 
@@ -100,27 +93,21 @@ public class UnsafeSortTempFileChunkHolder implements 
SortTempChunkHolder {
 
   private int numberOfObjectRead;
 
-  private int nullSetWordsLength;
-
-  private Comparator comparator;
-
+  private TableFieldStat tableFieldStat;
+  private SortStepRowHandler sortStepRowHandler;
+  private Comparator comparator;
   /**
* Constructor to initialize
*/
   public UnsafeSortTempFileChunkHolder(File tempFile, SortParameters 
parameters) {
 // set temp file
 this.tempFile = tempFile;
-this.dimCnt = parameters.getDimColCount();
-this.complexCnt = parameters.getComplexDimColCount();
-this.measureCnt = parameters.getMeasureColCount();
-this.isNoDictionaryDimensionColumn = 
parameters.getNoDictionaryDimnesionColumn();
-this.measureDataTypes = parameters.getMeasureDataType();
 this.readBufferSize = parameters.getBufferSize();
 this.compressorName = parameters.getSortTempCompressorName();
-
+this.tableFieldStat = new TableFieldStat(parameters);
+this.sortStepRowHandler = new SortStepRowHandler(tableFieldStat);
 this.executorService = Executors.newFixedThreadPool(1);
-this.nullSetWordsLength = ((parameters.getMeasureColCount() - 1) >> 6) + 1;
-comparator = new NewRowComparator(parameters.getNoDictionarySortColumn());
+comparator = new 
IntermediateSortTempRowComparator(parameters.getNoDictionarySortColumn());
 initialize();
   }
 
@@ -169,11 +156,17 @@ public class UnsafeSortTempFileChunkHolder implements 
SortTempChunkHolder {
*
* @throws CarbonSortKeyAndGroupByException problem while reading
*/
+  @Override
   public void readRow() throws CarbonSortKeyAndGroupByException {
 if (prefetch) {
   fillDataForPrefetch();
 } else {
-  this.returnRow = getRowFromStream();
+  try {
+this.returnRow = 
sortStepRowHandler.readIntermediateSortTempRowFromInputStream(stream);
+this.numberOfObjectRead++;
+  } catch (IOException e) {
+throw new CarbonSortKeyAndGroupByException("Problems while reading 
row", e);
+  }
 }
   }
 
@@ -207,63 +200,22 @@ public class UnsafeSortTempFileChunkHolder implements 
SortTempChunkHolder {
   }
 
   /**

[28/50] [abbrv] carbondata git commit: [REBASE] resolve conflict after rebasing to master

2018-03-04 Thread jackylk
[REBASE] resolve conflict after rebasing to master


Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo
Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/6216294c
Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/6216294c
Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/6216294c

Branch: refs/heads/carbonstore
Commit: 6216294c1e28c1db05e572f0aac3a991d345e085
Parents: 3fdd5d0
Author: Jacky Li 
Authored: Tue Feb 27 08:51:25 2018 +0800
Committer: Jacky Li 
Committed: Sun Mar 4 20:32:12 2018 +0800

--
 .../core/datamap/dev/AbstractDataMapWriter.java |  5 ++--
 .../core/datamap/dev/DataMapFactory.java|  2 +-
 .../blockletindex/BlockletDataMapFactory.java   |  2 +-
 .../SegmentUpdateStatusManager.java |  9 +-
 .../datamap/examples/MinMaxDataMapFactory.java  |  5 ++--
 .../datamap/examples/MinMaxDataWriter.java  |  7 +++--
 .../testsuite/datamap/CGDataMapTestCase.scala   | 26 
 .../testsuite/datamap/DataMapWriterSuite.scala  | 19 ++--
 .../testsuite/datamap/FGDataMapTestCase.scala   | 31 +---
 .../iud/DeleteCarbonTableTestCase.scala |  2 +-
 .../TestInsertAndOtherCommandConcurrent.scala   | 14 +
 .../StandardPartitionTableCleanTestCase.scala   | 12 
 .../carbondata/spark/util/DataLoadingUtil.scala |  2 +-
 .../datamap/DataMapWriterListener.java  |  2 +-
 .../processing/merger/CarbonDataMergerUtil.java |  8 +
 .../merger/CompactionResultSortProcessor.java   |  4 +--
 .../merger/RowResultMergerProcessor.java|  5 ++--
 .../partition/spliter/RowResultProcessor.java   |  5 ++--
 .../util/CarbonDataProcessorUtil.java   |  4 +--
 .../processing/util/CarbonLoaderUtil.java   |  9 --
 20 files changed, 73 insertions(+), 100 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/carbondata/blob/6216294c/core/src/main/java/org/apache/carbondata/core/datamap/dev/AbstractDataMapWriter.java
--
diff --git 
a/core/src/main/java/org/apache/carbondata/core/datamap/dev/AbstractDataMapWriter.java
 
b/core/src/main/java/org/apache/carbondata/core/datamap/dev/AbstractDataMapWriter.java
index bcc9bad..de6dcb1 100644
--- 
a/core/src/main/java/org/apache/carbondata/core/datamap/dev/AbstractDataMapWriter.java
+++ 
b/core/src/main/java/org/apache/carbondata/core/datamap/dev/AbstractDataMapWriter.java
@@ -18,6 +18,7 @@ package org.apache.carbondata.core.datamap.dev;
 
 import java.io.IOException;
 
+import org.apache.carbondata.core.datamap.Segment;
 import org.apache.carbondata.core.datastore.impl.FileFactory;
 import org.apache.carbondata.core.datastore.page.ColumnPage;
 import org.apache.carbondata.core.metadata.AbsoluteTableIdentifier;
@@ -35,10 +36,10 @@ public abstract class AbstractDataMapWriter {
 
   protected String writeDirectoryPath;
 
-  public AbstractDataMapWriter(AbsoluteTableIdentifier identifier, String 
segmentId,
+  public AbstractDataMapWriter(AbsoluteTableIdentifier identifier, Segment 
segment,
   String writeDirectoryPath) {
 this.identifier = identifier;
-this.segmentId = segmentId;
+this.segmentId = segment.getSegmentNo();
 this.writeDirectoryPath = writeDirectoryPath;
   }
 

http://git-wip-us.apache.org/repos/asf/carbondata/blob/6216294c/core/src/main/java/org/apache/carbondata/core/datamap/dev/DataMapFactory.java
--
diff --git 
a/core/src/main/java/org/apache/carbondata/core/datamap/dev/DataMapFactory.java 
b/core/src/main/java/org/apache/carbondata/core/datamap/dev/DataMapFactory.java
index df5670d..50ac279 100644
--- 
a/core/src/main/java/org/apache/carbondata/core/datamap/dev/DataMapFactory.java
+++ 
b/core/src/main/java/org/apache/carbondata/core/datamap/dev/DataMapFactory.java
@@ -39,7 +39,7 @@ public interface DataMapFactory {
   /**
* Return a new write for this datamap
*/
-  AbstractDataMapWriter createWriter(Segment segment);
+  AbstractDataMapWriter createWriter(Segment segment, String 
writeDirectoryPath);
 
   /**
* Get the datamap for segmentid

http://git-wip-us.apache.org/repos/asf/carbondata/blob/6216294c/core/src/main/java/org/apache/carbondata/core/indexstore/blockletindex/BlockletDataMapFactory.java
--
diff --git 
a/core/src/main/java/org/apache/carbondata/core/indexstore/blockletindex/BlockletDataMapFactory.java
 
b/core/src/main/java/org/apache/carbondata/core/indexstore/blockletindex/BlockletDataMapFactory.java
index efe2b71..ee849bd 100644
--- 
a/core/src/main/java/org/apache/carbondata/core/indexstore/blockletindex/BlockletDataMapFactory.java
+++ 

[50/50] [abbrv] carbondata git commit: Support generating assembling JAR for store-sdk module

2018-03-04 Thread jackylk
Support generating assembling JAR for store-sdk module

Support generating assembling JAR for store-sdk module and remove junit 
dependency

This closes #1976


Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo
Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/503e0d96
Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/503e0d96
Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/503e0d96

Branch: refs/heads/carbonstore
Commit: 503e0d96864173ccfb29e49686f0af3f7edd779f
Parents: 8fe8ab4
Author: Jacky Li 
Authored: Tue Feb 13 09:12:09 2018 +0800
Committer: Jacky Li 
Committed: Sun Mar 4 20:32:14 2018 +0800

--
 common/pom.xml|  2 +
 core/pom.xml  |  2 +
 hadoop/pom.xml|  1 +
 integration/presto/pom.xml|  3 +-
 integration/spark-common-cluster-test/pom.xml |  2 +-
 integration/spark-common-test/pom.xml |  3 +-
 integration/spark-common/pom.xml  |  2 +-
 integration/spark2/pom.xml|  2 +-
 pom.xml   |  5 +++
 processing/pom.xml|  1 +
 store/sdk/pom.xml | 50 +-
 streaming/pom.xml |  1 -
 12 files changed, 66 insertions(+), 8 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/carbondata/blob/503e0d96/common/pom.xml
--
diff --git a/common/pom.xml b/common/pom.xml
index 5550129..433d575 100644
--- a/common/pom.xml
+++ b/common/pom.xml
@@ -42,10 +42,12 @@
 
   junit
   junit
+  test
 
 
   org.jmockit
   jmockit
+  test
 
 
   org.apache.hadoop

http://git-wip-us.apache.org/repos/asf/carbondata/blob/503e0d96/core/pom.xml
--
diff --git a/core/pom.xml b/core/pom.xml
index 92c9607..824de0d 100644
--- a/core/pom.xml
+++ b/core/pom.xml
@@ -70,10 +70,12 @@
 
   org.jmockit
   jmockit
+  test
 
 
   junit
   junit
+  test
 
 
   org.apache.spark

http://git-wip-us.apache.org/repos/asf/carbondata/blob/503e0d96/hadoop/pom.xml
--
diff --git a/hadoop/pom.xml b/hadoop/pom.xml
index 2aaac99..c3964c5 100644
--- a/hadoop/pom.xml
+++ b/hadoop/pom.xml
@@ -42,6 +42,7 @@
 
   junit
   junit
+  test
 
   
 

http://git-wip-us.apache.org/repos/asf/carbondata/blob/503e0d96/integration/presto/pom.xml
--
diff --git a/integration/presto/pom.xml b/integration/presto/pom.xml
index aaaf175..0abcf38 100644
--- a/integration/presto/pom.xml
+++ b/integration/presto/pom.xml
@@ -193,7 +193,7 @@
 
 
   org.scalatest
-  scalatest_2.11
+  scalatest_${scala.binary.version}
 
 
   org.apache.zookeeper
@@ -330,7 +330,6 @@
 
   org.scalatest
   scalatest_${scala.binary.version}
-  2.2.1
   test
 
 

http://git-wip-us.apache.org/repos/asf/carbondata/blob/503e0d96/integration/spark-common-cluster-test/pom.xml
--
diff --git a/integration/spark-common-cluster-test/pom.xml 
b/integration/spark-common-cluster-test/pom.xml
index fd907a3..028da11 100644
--- a/integration/spark-common-cluster-test/pom.xml
+++ b/integration/spark-common-cluster-test/pom.xml
@@ -49,11 +49,11 @@
 
   junit
   junit
+  test
 
 
   org.scalatest
   scalatest_${scala.binary.version}
-  2.2.1
   test
 
 

http://git-wip-us.apache.org/repos/asf/carbondata/blob/503e0d96/integration/spark-common-test/pom.xml
--
diff --git a/integration/spark-common-test/pom.xml 
b/integration/spark-common-test/pom.xml
index 67a2317..d1c04ae 100644
--- a/integration/spark-common-test/pom.xml
+++ b/integration/spark-common-test/pom.xml
@@ -106,16 +106,17 @@
 
   junit
   junit
+  test
 
 
   org.scalatest
   scalatest_${scala.binary.version}
-  2.2.1
   test
 
 
   org.jmockit
   jmockit
+  test
 
   
 

http://git-wip-us.apache.org/repos/asf/carbondata/blob/503e0d96/integration/spark-common/pom.xml
--
diff --git a/integration/spark-common/pom.xml b/integration/spark-common/pom.xml
index 295d62b..16f327d 100644
--- a/integration/spark-common/pom.xml
+++ b/integration/spark-common/pom.xml
@@ -58,11 +58,11 @@
 
 

[46/50] [abbrv] carbondata git commit: [CARBONDATA-2091][DataLoad] Support specifying sort column bounds in data loading

2018-03-04 Thread jackylk
[CARBONDATA-2091][DataLoad] Support specifying sort column bounds in data 
loading

Enhance data loading performance by specifying sort column bounds
1. Add row range number during convert-process-step
2. Dispatch rows to each sorter by range number
3. Sort/Write process step can be done concurrently in each range
4. Since all sorttemp files will be written in one folders, we add range
number to the file name to distingush them

Tests added and docs updated

This closes #1953


Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo
Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/faad967d
Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/faad967d
Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/faad967d

Branch: refs/heads/carbonstore
Commit: faad967d8d83eabd3e758b081370235e42a3ecee
Parents: 623a1f9
Author: xuchuanyin 
Authored: Tue Feb 13 10:58:06 2018 +0800
Committer: Jacky Li 
Committed: Sun Mar 4 20:32:14 2018 +0800

--
 .../constants/CarbonLoadOptionConstants.java|  10 +
 .../core/datastore/row/CarbonRow.java   |  10 +-
 .../ThriftWrapperSchemaConverterImpl.java   |   2 +-
 .../core/metadata/schema/BucketingInfo.java |  24 +-
 .../core/metadata/schema/ColumnRangeInfo.java   |  29 ++
 .../metadata/schema/SortColumnRangeInfo.java|  83 +
 docs/data-management-on-carbondata.md   |  11 +
 .../TestLoadDataWithSortColumnBounds.scala  | 348 +++
 .../carbondata/spark/rdd/CarbonScanRDD.scala|   2 +-
 .../carbondata/spark/rdd/PartitionDropper.scala |   2 +-
 .../spark/rdd/PartitionSplitter.scala   |   2 +-
 .../spark/sql/catalyst/CarbonDDLSqlParser.scala |   3 +-
 .../strategy/CarbonLateDecodeStrategy.scala |   2 +-
 .../loading/CarbonDataLoadConfiguration.java|  11 +
 .../loading/DataLoadProcessBuilder.java |  77 +++-
 .../loading/converter/RowConverter.java |   2 +-
 .../converter/impl/RowConverterImpl.java|   5 +
 .../loading/model/CarbonLoadModel.java  |  14 +
 .../loading/model/CarbonLoadModelBuilder.java   |   1 +
 .../processing/loading/model/LoadOption.java|   1 +
 .../partition/impl/HashPartitionerImpl.java |  10 +-
 .../partition/impl/RangePartitionerImpl.java|  71 
 .../partition/impl/RawRowComparator.java|  63 
 .../processing/loading/sort/SorterFactory.java  |  16 +-
 ...arallelReadMergeSorterWithBucketingImpl.java | 272 ---
 ...allelReadMergeSorterWithColumnRangeImpl.java | 289 +++
 ...arallelReadMergeSorterWithBucketingImpl.java | 263 --
 ...allelReadMergeSorterWithColumnRangeImpl.java | 293 
 .../loading/sort/unsafe/UnsafeSortDataRows.java |   6 +-
 .../unsafe/merger/UnsafeIntermediateMerger.java |   6 +-
 .../UnsafeSingleThreadFinalSortFilesMerger.java |  11 +-
 .../steps/DataConverterProcessorStepImpl.java   | 102 +-
 ...ConverterProcessorWithBucketingStepImpl.java | 161 -
 .../steps/DataWriterProcessorStepImpl.java  |  70 +++-
 .../SingleThreadFinalSortFilesMerger.java   |   3 +-
 .../processing/sort/sortdata/SortDataRows.java  |  11 +-
 .../sortdata/SortIntermediateFileMerger.java|   6 +-
 .../sort/sortdata/SortParameters.java   |  10 +
 .../store/CarbonFactDataHandlerColumnar.java|   6 +-
 39 files changed, 1558 insertions(+), 750 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/carbondata/blob/faad967d/core/src/main/java/org/apache/carbondata/core/constants/CarbonLoadOptionConstants.java
--
diff --git 
a/core/src/main/java/org/apache/carbondata/core/constants/CarbonLoadOptionConstants.java
 
b/core/src/main/java/org/apache/carbondata/core/constants/CarbonLoadOptionConstants.java
index a6bf60f..8ff8dc4 100644
--- 
a/core/src/main/java/org/apache/carbondata/core/constants/CarbonLoadOptionConstants.java
+++ 
b/core/src/main/java/org/apache/carbondata/core/constants/CarbonLoadOptionConstants.java
@@ -124,4 +124,14 @@ public final class CarbonLoadOptionConstants {
   public static final String ENABLE_CARBON_LOAD_SKEWED_DATA_OPTIMIZATION
   = "carbon.load.skewedDataOptimization.enabled";
   public static final String 
ENABLE_CARBON_LOAD_SKEWED_DATA_OPTIMIZATION_DEFAULT = "false";
+
+  /**
+   * field delimiter for each field in one bound
+   */
+  public static final String SORT_COLUMN_BOUNDS_FIELD_DELIMITER = ",";
+
+  /**
+   * row delimiter for each sort column bounds
+   */
+  public static final String SORT_COLUMN_BOUNDS_ROW_DELIMITER = ";";
 }

http://git-wip-us.apache.org/repos/asf/carbondata/blob/faad967d/core/src/main/java/org/apache/carbondata/core/datastore/row/CarbonRow.java
--
diff --git 

[47/50] [abbrv] carbondata git commit: [REBASE] Solve conflict after merging master

2018-03-04 Thread jackylk
[REBASE] Solve conflict after merging master


Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo
Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/8104735f
Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/8104735f
Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/8104735f

Branch: refs/heads/carbonstore
Commit: 8104735fd66952a531153eb0d3b4db5c9ecc133d
Parents: ce88eb6
Author: Jacky Li 
Authored: Tue Feb 27 11:26:30 2018 +0800
Committer: Jacky Li 
Committed: Sun Mar 4 20:32:14 2018 +0800

--
 .../carbondata/core/datamap/dev/DataMap.java|   6 -
 .../core/datamap/dev/DataMapFactory.java|   2 +-
 .../exception/ConcurrentOperationException.java |  16 +-
 .../core/metadata/PartitionMapFileStore.java|   0
 .../statusmanager/SegmentStatusManager.java |  10 +-
 .../SegmentUpdateStatusManager.java |   1 -
 datamap/examples/pom.xml| 145 +++--
 .../datamap/examples/MinMaxDataWriter.java  |   1 -
 examples/flink/pom.xml  |   4 +-
 .../carbondata/examples/FlinkExample.scala  |  10 +-
 .../CarbonStreamSparkStreamingExample.scala |   1 -
 .../hadoop/api/CarbonTableInputFormat.java  |   5 +-
 .../TestInsertAndOtherCommandConcurrent.scala   |   2 +-
 .../StandardPartitionGlobalSortTestCase.scala   |   2 +-
 .../exception/ProcessMetaDataException.java |   2 +
 .../org/apache/carbondata/api/CarbonStore.scala |   6 +-
 .../carbondata/spark/load/CsvRDDHelper.scala| 157 +++
 .../load/DataLoadProcessBuilderOnSpark.scala|   3 +-
 .../carbondata/spark/util/CarbonScalaUtil.scala |   2 +-
 .../carbondata/spark/util/CommonUtil.scala  |   2 -
 .../command/carbonTableSchemaCommon.scala   |   6 +-
 .../CarbonAlterTableCompactionCommand.scala |   3 +-
 .../management/CarbonCleanFilesCommand.scala|   2 +-
 .../CarbonDeleteLoadByIdCommand.scala   |   2 +-
 .../CarbonDeleteLoadByLoadDateCommand.scala |   2 +-
 .../management/CarbonLoadDataCommand.scala  |  28 ++--
 .../CarbonProjectForDeleteCommand.scala |   2 +-
 .../CarbonProjectForUpdateCommand.scala |   2 +-
 .../schema/CarbonAlterTableRenameCommand.scala  |   2 +-
 .../command/table/CarbonDropTableCommand.scala  |   2 +-
 .../datasources/CarbonFileFormat.scala  |   3 -
 .../vectorreader/AddColumnTestCases.scala   |   1 +
 .../datamap/DataMapWriterListener.java  |   3 +-
 .../loading/model/CarbonLoadModelBuilder.java   |  34 +++-
 .../processing/loading/model/LoadOption.java|  15 +-
 .../processing/merger/CarbonDataMergerUtil.java |   3 +-
 .../util/CarbonDataProcessorUtil.java   |   3 +-
 .../processing/util/CarbonLoaderUtil.java   |   8 +
 store/sdk/pom.xml   |   2 +-
 .../carbondata/sdk/file/CSVCarbonWriter.java|   8 +-
 40 files changed, 336 insertions(+), 172 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/carbondata/blob/8104735f/core/src/main/java/org/apache/carbondata/core/datamap/dev/DataMap.java
--
diff --git 
a/core/src/main/java/org/apache/carbondata/core/datamap/dev/DataMap.java 
b/core/src/main/java/org/apache/carbondata/core/datamap/dev/DataMap.java
index 02db8af..dd5507c 100644
--- a/core/src/main/java/org/apache/carbondata/core/datamap/dev/DataMap.java
+++ b/core/src/main/java/org/apache/carbondata/core/datamap/dev/DataMap.java
@@ -38,9 +38,6 @@ public interface DataMap {
   /**
* Prune the datamap with filter expression and partition information. It 
returns the list of
* blocklets where these filters can exist.
-   *
-   * @param filterExp
-   * @return
*/
   List prune(FilterResolverIntf filterExp, SegmentProperties 
segmentProperties,
   List partitions);
@@ -48,9 +45,6 @@ public interface DataMap {
   // TODO Move this method to Abstract class
   /**
* Validate whether the current segment needs to be fetching the required 
data
-   *
-   * @param filterExp
-   * @return
*/
   boolean isScanRequired(FilterResolverIntf filterExp);
 

http://git-wip-us.apache.org/repos/asf/carbondata/blob/8104735f/core/src/main/java/org/apache/carbondata/core/datamap/dev/DataMapFactory.java
--
diff --git 
a/core/src/main/java/org/apache/carbondata/core/datamap/dev/DataMapFactory.java 
b/core/src/main/java/org/apache/carbondata/core/datamap/dev/DataMapFactory.java
index 50ac279..d8a467f 100644
--- 
a/core/src/main/java/org/apache/carbondata/core/datamap/dev/DataMapFactory.java
+++ 
b/core/src/main/java/org/apache/carbondata/core/datamap/dev/DataMapFactory.java
@@ -21,8 +21,8 @@ import java.util.List;
 
 import 

[43/50] [abbrv] carbondata git commit: [CARBONDATA-2159] Remove carbon-spark dependency in store-sdk module

2018-03-04 Thread jackylk
http://git-wip-us.apache.org/repos/asf/carbondata/blob/dcfe73b8/integration/spark-common/src/main/scala/org/apache/carbondata/spark/util/DataLoadingUtil.scala
--
diff --git 
a/integration/spark-common/src/main/scala/org/apache/carbondata/spark/util/DataLoadingUtil.scala
 
b/integration/spark-common/src/main/scala/org/apache/carbondata/spark/util/DataLoadingUtil.scala
index 8d394db..e69de29 100644
--- 
a/integration/spark-common/src/main/scala/org/apache/carbondata/spark/util/DataLoadingUtil.scala
+++ 
b/integration/spark-common/src/main/scala/org/apache/carbondata/spark/util/DataLoadingUtil.scala
@@ -1,610 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.carbondata.spark.util
-
-import java.text.SimpleDateFormat
-import java.util
-import java.util.{Date, List, Locale}
-
-import scala.collection.{immutable, mutable}
-import scala.collection.JavaConverters._
-import scala.collection.mutable.ArrayBuffer
-
-import org.apache.commons.lang3.StringUtils
-import org.apache.hadoop.conf.Configuration
-import org.apache.hadoop.fs.Path
-import org.apache.hadoop.mapred.JobConf
-import org.apache.hadoop.mapreduce.{TaskAttemptID, TaskType}
-import org.apache.hadoop.mapreduce.lib.input.{FileInputFormat, FileSplit}
-import org.apache.hadoop.mapreduce.task.{JobContextImpl, 
TaskAttemptContextImpl}
-import org.apache.spark.deploy.SparkHadoopUtil
-import org.apache.spark.rdd.RDD
-import org.apache.spark.sql.SparkSession
-import org.apache.spark.sql.catalyst.InternalRow
-import org.apache.spark.sql.catalyst.expressions.GenericInternalRow
-import org.apache.spark.sql.execution.datasources.{FilePartition, FileScanRDD, 
PartitionedFile}
-import org.apache.spark.sql.util.CarbonException
-import org.apache.spark.sql.util.SparkSQLUtil.sessionState
-
-import org.apache.carbondata.common.constants.LoggerAction
-import org.apache.carbondata.common.logging.{LogService, LogServiceFactory}
-import org.apache.carbondata.core.constants.{CarbonCommonConstants, 
CarbonLoadOptionConstants}
-import org.apache.carbondata.core.indexstore.PartitionSpec
-import org.apache.carbondata.core.locks.{CarbonLockFactory, CarbonLockUtil, 
LockUsage}
-import org.apache.carbondata.core.metadata.AbsoluteTableIdentifier
-import org.apache.carbondata.core.metadata.schema.table.CarbonTable
-import org.apache.carbondata.core.statusmanager.{LoadMetadataDetails, 
SegmentStatus, SegmentStatusManager}
-import org.apache.carbondata.core.util.{CarbonProperties, CarbonUtil}
-import 
org.apache.carbondata.processing.loading.constants.DataLoadProcessorConstants
-import org.apache.carbondata.processing.loading.csvinput.CSVInputFormat
-import org.apache.carbondata.processing.loading.model.{CarbonDataLoadSchema, 
CarbonLoadModel}
-import org.apache.carbondata.processing.util.{CarbonLoaderUtil, 
DeleteLoadFolders, TableOptionConstant}
-import org.apache.carbondata.spark.exception.MalformedCarbonCommandException
-import org.apache.carbondata.spark.load.DataLoadProcessBuilderOnSpark.LOGGER
-import org.apache.carbondata.spark.load.ValidateUtil
-import org.apache.carbondata.spark.rdd.SerializableConfiguration
-
-/**
- * the util object of data loading
- */
-object DataLoadingUtil {
-
-  val LOGGER: LogService = 
LogServiceFactory.getLogService(this.getClass.getCanonicalName)
-
-  /**
-   * get data loading options and initialise default value
-   */
-  def getDataLoadingOptions(
-  carbonProperty: CarbonProperties,
-  options: immutable.Map[String, String]): mutable.Map[String, String] = {
-val optionsFinal = scala.collection.mutable.Map[String, String]()
-optionsFinal.put("delimiter", options.getOrElse("delimiter", ","))
-optionsFinal.put("quotechar", options.getOrElse("quotechar", "\""))
-optionsFinal.put("fileheader", options.getOrElse("fileheader", ""))
-optionsFinal.put("commentchar", options.getOrElse("commentchar", "#"))
-optionsFinal.put("columndict", options.getOrElse("columndict", null))
-
-optionsFinal.put("escapechar",
-  CarbonLoaderUtil.getEscapeChar(options.getOrElse("escapechar", "\\")))
-
-optionsFinal.put(
-  "serialization_null_format",
-  

[41/50] [abbrv] carbondata git commit: [CARBONDATA-1997] Add CarbonWriter SDK API

2018-03-04 Thread jackylk
[CARBONDATA-1997] Add CarbonWriter SDK API

Added a new module called store-sdk, and added a CarbonWriter API, it can be 
used to write Carbondata files to a specified folder, without Spark and Hadoop 
dependency. User can use this API in any environment.

This closes #1967


Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo
Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/5fccdabf
Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/5fccdabf
Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/5fccdabf

Branch: refs/heads/carbonstore
Commit: 5fccdabfc1cc4656d75e51867dcfcb250c505c91
Parents: fc31be7
Author: Jacky Li 
Authored: Sat Feb 10 19:44:23 2018 +0800
Committer: Jacky Li 
Committed: Sun Mar 4 20:32:13 2018 +0800

--
 .../org/apache/carbondata/common/Strings.java   |  40 
 .../apache/carbondata/common/StringsSuite.java  |  53 +
 .../core/metadata/schema/table/CarbonTable.java |   7 +
 .../schema/table/CarbonTableBuilder.java|  72 +++
 .../core/metadata/schema/table/TableSchema.java |   7 +
 .../schema/table/TableSchemaBuilder.java| 107 ++
 .../schema/table/CarbonTableBuilderSuite.java   |  86 
 .../metadata/schema/table/CarbonTableTest.java  |  12 +-
 .../schema/table/TableSchemaBuilderSuite.java   |  56 ++
 .../carbondata/spark/util/DataLoadingUtil.scala |  45 +
 pom.xml |   7 +
 store/sdk/pom.xml   | 130 +
 .../carbondata/sdk/file/CSVCarbonWriter.java|  89 +
 .../carbondata/sdk/file/CarbonWriter.java   |  51 +
 .../sdk/file/CarbonWriterBuilder.java   | 194 +++
 .../org/apache/carbondata/sdk/file/Field.java   |  74 +++
 .../org/apache/carbondata/sdk/file/Schema.java  |  74 +++
 .../sdk/file/CSVCarbonWriterSuite.java  | 127 
 18 files changed, 1225 insertions(+), 6 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/carbondata/blob/5fccdabf/common/src/main/java/org/apache/carbondata/common/Strings.java
--
diff --git a/common/src/main/java/org/apache/carbondata/common/Strings.java 
b/common/src/main/java/org/apache/carbondata/common/Strings.java
new file mode 100644
index 000..23288dd
--- /dev/null
+++ b/common/src/main/java/org/apache/carbondata/common/Strings.java
@@ -0,0 +1,40 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.carbondata.common;
+
+import java.util.Objects;
+
+public class Strings {
+
+  /**
+   * Provide same function as mkString in Scala.
+   * This is added to avoid JDK 8 dependency.
+   */
+  public static String mkString(String[] strings, String delimeter) {
+Objects.requireNonNull(strings);
+Objects.requireNonNull(delimeter);
+StringBuilder builder = new StringBuilder();
+for (int i = 0; i < strings.length; i++) {
+  builder.append(strings[i]);
+  if (i != strings.length - 1) {
+builder.append(delimeter);
+  }
+}
+return builder.toString();
+  }
+}

http://git-wip-us.apache.org/repos/asf/carbondata/blob/5fccdabf/common/src/test/java/org/apache/carbondata/common/StringsSuite.java
--
diff --git 
a/common/src/test/java/org/apache/carbondata/common/StringsSuite.java 
b/common/src/test/java/org/apache/carbondata/common/StringsSuite.java
new file mode 100644
index 000..65da32b
--- /dev/null
+++ b/common/src/test/java/org/apache/carbondata/common/StringsSuite.java
@@ -0,0 +1,53 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at

[02/50] [abbrv] carbondata git commit: [CARBONDATA-1968] Add external table support

2018-03-04 Thread jackylk
[CARBONDATA-1968] Add external table support

This PR adds support for creating external table with existing carbondata 
files, using Hive syntax.
CREATE EXTERNAL TABLE tableName STORED BY 'carbondata' LOCATION 'path'

This closes #1749


Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo
Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/0c75ab73
Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/0c75ab73
Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/0c75ab73

Branch: refs/heads/carbonstore
Commit: 0c75ab7359ad89a16f749e84bd42416523d5255a
Parents: 5663e91
Author: Jacky Li 
Authored: Tue Jan 2 23:46:14 2018 +0800
Committer: Jacky Li 
Committed: Sun Mar 4 20:30:31 2018 +0800

--
 .../core/metadata/schema/table/CarbonTable.java |  9 ++
 .../createTable/TestCreateExternalTable.scala   | 91 
 .../TestDataWithDicExcludeAndInclude.scala  | 10 ---
 .../command/table/CarbonDropTableCommand.scala  |  5 +-
 .../spark/sql/parser/CarbonSparkSqlParser.scala | 64 +-
 5 files changed, 147 insertions(+), 32 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/carbondata/blob/0c75ab73/core/src/main/java/org/apache/carbondata/core/metadata/schema/table/CarbonTable.java
--
diff --git 
a/core/src/main/java/org/apache/carbondata/core/metadata/schema/table/CarbonTable.java
 
b/core/src/main/java/org/apache/carbondata/core/metadata/schema/table/CarbonTable.java
index 09ff440..6036569 100644
--- 
a/core/src/main/java/org/apache/carbondata/core/metadata/schema/table/CarbonTable.java
+++ 
b/core/src/main/java/org/apache/carbondata/core/metadata/schema/table/CarbonTable.java
@@ -785,6 +785,15 @@ public class CarbonTable implements Serializable {
 && !tableInfo.getParentRelationIdentifiers().isEmpty();
   }
 
+  /**
+   * Return true if this is an external table (table with property 
"_external"="true", this is
+   * an internal table property set during table creation)
+   */
+  public boolean isExternalTable() {
+String external = 
tableInfo.getFactTable().getTableProperties().get("_external");
+return external != null && external.equalsIgnoreCase("true");
+  }
+
   public long size() throws IOException {
 Map dataIndexSize = CarbonUtil.calculateDataIndexSize(this);
 Long dataSize = 
dataIndexSize.get(CarbonCommonConstants.CARBON_TOTAL_DATA_SIZE);

http://git-wip-us.apache.org/repos/asf/carbondata/blob/0c75ab73/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/createTable/TestCreateExternalTable.scala
--
diff --git 
a/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/createTable/TestCreateExternalTable.scala
 
b/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/createTable/TestCreateExternalTable.scala
new file mode 100644
index 000..67370eb
--- /dev/null
+++ 
b/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/createTable/TestCreateExternalTable.scala
@@ -0,0 +1,91 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.carbondata.spark.testsuite.createTable
+
+import java.io.File
+
+import org.apache.spark.sql.{AnalysisException, CarbonEnv}
+import org.apache.spark.sql.test.util.QueryTest
+import org.scalatest.BeforeAndAfterAll
+
+class TestCreateExternalTable extends QueryTest with BeforeAndAfterAll {
+
+  var originDataPath: String = _
+
+  override def beforeAll(): Unit = {
+sql("DROP TABLE IF EXISTS origin")
+// create carbon table and insert data
+sql("CREATE TABLE origin(key INT, value STRING) STORED BY 'carbondata'")
+sql("INSERT INTO origin select 100,'spark'")
+sql("INSERT INTO origin select 200,'hive'")
+originDataPath = s"$storeLocation/origin"
+  }
+
+  override def afterAll(): Unit = {
+sql("DROP TABLE IF EXISTS origin")
+  }
+
+  test("create 

[12/50] [abbrv] carbondata git commit: [CARBONDATA-2099] Refactor query scan process to improve readability

2018-03-04 Thread jackylk
http://git-wip-us.apache.org/repos/asf/carbondata/blob/92c9f224/core/src/main/java/org/apache/carbondata/core/scan/filter/executer/RowLevelFilterExecuterImpl.java
--
diff --git 
a/core/src/main/java/org/apache/carbondata/core/scan/filter/executer/RowLevelFilterExecuterImpl.java
 
b/core/src/main/java/org/apache/carbondata/core/scan/filter/executer/RowLevelFilterExecuterImpl.java
index de97e82..540607d 100644
--- 
a/core/src/main/java/org/apache/carbondata/core/scan/filter/executer/RowLevelFilterExecuterImpl.java
+++ 
b/core/src/main/java/org/apache/carbondata/core/scan/filter/executer/RowLevelFilterExecuterImpl.java
@@ -34,8 +34,8 @@ import org.apache.carbondata.common.logging.LogServiceFactory;
 import org.apache.carbondata.core.cache.dictionary.Dictionary;
 import org.apache.carbondata.core.constants.CarbonCommonConstants;
 import org.apache.carbondata.core.datastore.block.SegmentProperties;
-import org.apache.carbondata.core.datastore.chunk.DimensionColumnDataChunk;
-import 
org.apache.carbondata.core.datastore.chunk.impl.VariableLengthDimensionDataChunk;
+import org.apache.carbondata.core.datastore.chunk.DimensionColumnPage;
+import 
org.apache.carbondata.core.datastore.chunk.impl.VariableLengthDimensionColumnPage;
 import org.apache.carbondata.core.datastore.page.ColumnPage;
 import org.apache.carbondata.core.keygenerator.KeyGenException;
 import 
org.apache.carbondata.core.keygenerator.directdictionary.DirectDictionaryGenerator;
@@ -58,7 +58,7 @@ import org.apache.carbondata.core.scan.filter.intf.RowImpl;
 import org.apache.carbondata.core.scan.filter.intf.RowIntf;
 import 
org.apache.carbondata.core.scan.filter.resolver.resolverinfo.DimColumnResolvedFilterInfo;
 import 
org.apache.carbondata.core.scan.filter.resolver.resolverinfo.MeasureColumnResolvedFilterInfo;
-import org.apache.carbondata.core.scan.processor.BlocksChunkHolder;
+import org.apache.carbondata.core.scan.processor.RawBlockletColumnChunks;
 import org.apache.carbondata.core.util.BitSetGroup;
 import org.apache.carbondata.core.util.ByteUtil;
 import org.apache.carbondata.core.util.CarbonUtil;
@@ -68,20 +68,20 @@ public class RowLevelFilterExecuterImpl implements 
FilterExecuter {
 
   private static final LogService LOGGER =
   
LogServiceFactory.getLogService(RowLevelFilterExecuterImpl.class.getName());
-  protected List dimColEvaluatorInfoList;
-  protected List msrColEvalutorInfoList;
+  List dimColEvaluatorInfoList;
+  List msrColEvalutorInfoList;
   protected Expression exp;
   protected AbsoluteTableIdentifier tableIdentifier;
   protected SegmentProperties segmentProperties;
   /**
* it has index at which given dimension is stored in file
*/
-  protected int[] dimensionBlocksIndex;
+  int[] dimensionChunkIndex;
 
   /**
* it has index at which given measure is stored in file
*/
-  protected int[] measureBlocksIndex;
+  int[] measureChunkIndex;
 
   private Map complexDimensionInfoMap;
 
@@ -89,18 +89,18 @@ public class RowLevelFilterExecuterImpl implements 
FilterExecuter {
* flag to check whether the filter dimension is present in current block 
list of dimensions.
* Applicable for restructure scenarios
*/
-  protected boolean[] isDimensionPresentInCurrentBlock;
+  boolean[] isDimensionPresentInCurrentBlock;
 
   /**
* flag to check whether the filter measure is present in current block list 
of measures.
* Applicable for restructure scenarios
*/
-  protected boolean[] isMeasurePresentInCurrentBlock;
+  boolean[] isMeasurePresentInCurrentBlock;
 
   /**
* is dimension column data is natural sorted
*/
-  protected boolean isNaturalSorted;
+  boolean isNaturalSorted;
 
   /**
* date direct dictionary generator
@@ -124,10 +124,10 @@ public class RowLevelFilterExecuterImpl implements 
FilterExecuter {
 }
 if (this.dimColEvaluatorInfoList.size() > 0) {
   this.isDimensionPresentInCurrentBlock = new 
boolean[dimColEvaluatorInfoList.size()];
-  this.dimensionBlocksIndex = new int[dimColEvaluatorInfoList.size()];
+  this.dimensionChunkIndex = new int[dimColEvaluatorInfoList.size()];
 } else {
   this.isDimensionPresentInCurrentBlock = new boolean[]{false};
-  this.dimensionBlocksIndex = new int[]{0};
+  this.dimensionChunkIndex = new int[]{0};
 }
 if (null == msrColEvalutorInfoList) {
   this.msrColEvalutorInfoList = new 
ArrayList(20);
@@ -136,10 +136,10 @@ public class RowLevelFilterExecuterImpl implements 
FilterExecuter {
 }
 if (this.msrColEvalutorInfoList.size() > 0) {
   this.isMeasurePresentInCurrentBlock = new 
boolean[msrColEvalutorInfoList.size()];
-  this.measureBlocksIndex = new int[msrColEvalutorInfoList.size()];
+  this.measureChunkIndex = new int[msrColEvalutorInfoList.size()];
 } else {
   this.isMeasurePresentInCurrentBlock = new boolean[]{false};
-  this.measureBlocksIndex = new int[] {0};

[42/50] [abbrv] carbondata git commit: [CARBONDATA-2159] Remove carbon-spark dependency in store-sdk module

2018-03-04 Thread jackylk
http://git-wip-us.apache.org/repos/asf/carbondata/blob/dcfe73b8/processing/src/main/java/org/apache/carbondata/processing/loading/model/CarbonLoadModelBuilder.java
--
diff --git 
a/processing/src/main/java/org/apache/carbondata/processing/loading/model/CarbonLoadModelBuilder.java
 
b/processing/src/main/java/org/apache/carbondata/processing/loading/model/CarbonLoadModelBuilder.java
new file mode 100644
index 000..fbb93b6
--- /dev/null
+++ 
b/processing/src/main/java/org/apache/carbondata/processing/loading/model/CarbonLoadModelBuilder.java
@@ -0,0 +1,322 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.carbondata.processing.loading.model;
+
+import java.io.IOException;
+import java.text.SimpleDateFormat;
+import java.util.List;
+import java.util.Map;
+
+import org.apache.carbondata.common.Maps;
+import org.apache.carbondata.common.Strings;
+import org.apache.carbondata.common.annotations.InterfaceAudience;
+import org.apache.carbondata.common.constants.LoggerAction;
+import org.apache.carbondata.common.exceptions.sql.InvalidLoadOptionException;
+import org.apache.carbondata.core.constants.CarbonCommonConstants;
+import org.apache.carbondata.core.metadata.schema.table.CarbonTable;
+import org.apache.carbondata.core.metadata.schema.table.column.CarbonColumn;
+import org.apache.carbondata.core.util.CarbonProperties;
+import org.apache.carbondata.core.util.CarbonUtil;
+import 
org.apache.carbondata.processing.loading.constants.DataLoadProcessorConstants;
+import org.apache.carbondata.processing.loading.csvinput.CSVInputFormat;
+import org.apache.carbondata.processing.loading.sort.SortScopeOptions;
+import org.apache.carbondata.processing.util.TableOptionConstant;
+
+import org.apache.commons.lang.StringUtils;
+import org.apache.hadoop.conf.Configuration;
+
+/**
+ * Builder for {@link CarbonLoadModel}
+ */
+@InterfaceAudience.Developer
+public class CarbonLoadModelBuilder {
+
+  private CarbonTable table;
+
+  public CarbonLoadModelBuilder(CarbonTable table) {
+this.table = table;
+  }
+
+  /**
+   * build CarbonLoadModel for data loading
+   * @param options Load options from user input
+   * @return a new CarbonLoadModel instance
+   */
+  public CarbonLoadModel build(
+  Map options) throws InvalidLoadOptionException, 
IOException {
+Map optionsFinal = 
LoadOption.fillOptionWithDefaultValue(options);
+optionsFinal.put("sort_scope", "no_sort");
+if (!options.containsKey("fileheader")) {
+  List csvHeader = 
table.getCreateOrderColumn(table.getTableName());
+  String[] columns = new String[csvHeader.size()];
+  for (int i = 0; i < columns.length; i++) {
+columns[i] = csvHeader.get(i).getColName();
+  }
+  optionsFinal.put("fileheader", Strings.mkString(columns, ","));
+}
+CarbonLoadModel model = new CarbonLoadModel();
+
+// we have provided 'fileheader', so it hadoopConf can be null
+build(options, optionsFinal, model, null);
+
+// set default values
+
model.setTimestampformat(CarbonCommonConstants.CARBON_TIMESTAMP_DEFAULT_FORMAT);
+model.setDateFormat(CarbonCommonConstants.CARBON_DATE_DEFAULT_FORMAT);
+model.setUseOnePass(Boolean.parseBoolean(Maps.getOrDefault(options, 
"onepass", "false")));
+model.setDictionaryServerHost(Maps.getOrDefault(options, "dicthost", 
null));
+try {
+  
model.setDictionaryServerPort(Integer.parseInt(Maps.getOrDefault(options, 
"dictport", "-1")));
+} catch (NumberFormatException e) {
+  throw new InvalidLoadOptionException(e.getMessage());
+}
+return model;
+  }
+
+  /**
+   * build CarbonLoadModel for data loading
+   * @param options Load options from user input
+   * @param optionsFinal Load options that populated with default values for 
optional options
+   * @param carbonLoadModel The output load model
+   * @param hadoopConf hadoopConf is needed to read CSV header if there 
'fileheader' is not set in
+   *   user provided load options
+   */
+  public void build(
+  Map options,
+  Map optionsFinal,
+  CarbonLoadModel 

[45/50] [abbrv] carbondata git commit: [CARBONDATA-2091][DataLoad] Support specifying sort column bounds in data loading

2018-03-04 Thread jackylk
http://git-wip-us.apache.org/repos/asf/carbondata/blob/faad967d/processing/src/main/java/org/apache/carbondata/processing/loading/sort/impl/UnsafeParallelReadMergeSorterWithBucketingImpl.java
--
diff --git 
a/processing/src/main/java/org/apache/carbondata/processing/loading/sort/impl/UnsafeParallelReadMergeSorterWithBucketingImpl.java
 
b/processing/src/main/java/org/apache/carbondata/processing/loading/sort/impl/UnsafeParallelReadMergeSorterWithBucketingImpl.java
deleted file mode 100644
index f605b22..000
--- 
a/processing/src/main/java/org/apache/carbondata/processing/loading/sort/impl/UnsafeParallelReadMergeSorterWithBucketingImpl.java
+++ /dev/null
@@ -1,263 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.carbondata.processing.loading.sort.impl;
-
-import java.io.File;
-import java.util.Iterator;
-import java.util.List;
-import java.util.concurrent.ExecutorService;
-import java.util.concurrent.Executors;
-import java.util.concurrent.TimeUnit;
-
-import org.apache.carbondata.common.CarbonIterator;
-import org.apache.carbondata.common.logging.LogService;
-import org.apache.carbondata.common.logging.LogServiceFactory;
-import org.apache.carbondata.core.constants.CarbonCommonConstants;
-import org.apache.carbondata.core.datastore.row.CarbonRow;
-import org.apache.carbondata.core.memory.MemoryException;
-import org.apache.carbondata.core.metadata.schema.BucketingInfo;
-import org.apache.carbondata.core.util.CarbonProperties;
-import org.apache.carbondata.core.util.CarbonTimeStatisticsFactory;
-import org.apache.carbondata.processing.loading.DataField;
-import 
org.apache.carbondata.processing.loading.exception.CarbonDataLoadingException;
-import org.apache.carbondata.processing.loading.row.CarbonRowBatch;
-import org.apache.carbondata.processing.loading.sort.AbstractMergeSorter;
-import 
org.apache.carbondata.processing.loading.sort.unsafe.UnsafeCarbonRowPage;
-import org.apache.carbondata.processing.loading.sort.unsafe.UnsafeSortDataRows;
-import 
org.apache.carbondata.processing.loading.sort.unsafe.merger.UnsafeIntermediateMerger;
-import 
org.apache.carbondata.processing.loading.sort.unsafe.merger.UnsafeSingleThreadFinalSortFilesMerger;
-import org.apache.carbondata.processing.sort.sortdata.SortParameters;
-import org.apache.carbondata.processing.util.CarbonDataProcessorUtil;
-
-/**
- * It parallely reads data from array of iterates and do merge sort.
- * First it sorts the data and write to temp files. These temp files will be 
merge sorted to get
- * final merge sort result.
- * This step is specifically for bucketing, it sorts each bucket data 
separately and write to
- * temp files.
- */
-public class UnsafeParallelReadMergeSorterWithBucketingImpl extends 
AbstractMergeSorter {
-
-  private static final LogService LOGGER =
-  LogServiceFactory.getLogService(
-
UnsafeParallelReadMergeSorterWithBucketingImpl.class.getName());
-
-  private SortParameters sortParameters;
-
-  private BucketingInfo bucketingInfo;
-
-  public UnsafeParallelReadMergeSorterWithBucketingImpl(DataField[] 
inputDataFields,
-  BucketingInfo bucketingInfo) {
-this.bucketingInfo = bucketingInfo;
-  }
-
-  @Override public void initialize(SortParameters sortParameters) {
-this.sortParameters = sortParameters;
-  }
-
-  @Override public Iterator[] sort(Iterator[] 
iterators)
-  throws CarbonDataLoadingException {
-UnsafeSortDataRows[] sortDataRows = new 
UnsafeSortDataRows[bucketingInfo.getNumberOfBuckets()];
-UnsafeIntermediateMerger[] intermediateFileMergers =
-new UnsafeIntermediateMerger[sortDataRows.length];
-int inMemoryChunkSizeInMB = 
CarbonProperties.getInstance().getSortMemoryChunkSizeInMB();
-inMemoryChunkSizeInMB = inMemoryChunkSizeInMB / 
bucketingInfo.getNumberOfBuckets();
-if (inMemoryChunkSizeInMB < 5) {
-  inMemoryChunkSizeInMB = 5;
-}
-try {
-  for (int i = 0; i < bucketingInfo.getNumberOfBuckets(); i++) {
-SortParameters parameters = sortParameters.getCopy();
-parameters.setPartitionID(i + "");
-setTempLocation(parameters);
-

[07/50] [abbrv] carbondata git commit: [CARBONDATA-2099] Refactor query scan process to improve readability

2018-03-04 Thread jackylk
http://git-wip-us.apache.org/repos/asf/carbondata/blob/92c9f224/core/src/test/java/org/apache/carbondata/core/util/CarbonUtilTest.java
--
diff --git 
a/core/src/test/java/org/apache/carbondata/core/util/CarbonUtilTest.java 
b/core/src/test/java/org/apache/carbondata/core/util/CarbonUtilTest.java
index f4450e3..5f8d199 100644
--- a/core/src/test/java/org/apache/carbondata/core/util/CarbonUtilTest.java
+++ b/core/src/test/java/org/apache/carbondata/core/util/CarbonUtilTest.java
@@ -31,7 +31,7 @@ import java.util.Map;
 
 import org.apache.carbondata.core.datamap.Segment;
 import org.apache.carbondata.core.datastore.block.TableBlockInfo;
-import 
org.apache.carbondata.core.datastore.chunk.impl.FixedLengthDimensionDataChunk;
+import 
org.apache.carbondata.core.datastore.chunk.impl.FixedLengthDimensionColumnPage;
 import org.apache.carbondata.core.datastore.columnar.ColumnGroupModel;
 import org.apache.carbondata.core.datastore.filesystem.LocalCarbonFile;
 import org.apache.carbondata.core.datastore.impl.FileFactory;
@@ -45,7 +45,7 @@ import org.apache.carbondata.core.metadata.encoder.Encoding;
 import org.apache.carbondata.core.metadata.schema.table.column.CarbonDimension;
 import org.apache.carbondata.core.metadata.schema.table.column.CarbonMeasure;
 import org.apache.carbondata.core.metadata.schema.table.column.ColumnSchema;
-import org.apache.carbondata.core.scan.model.QueryDimension;
+import org.apache.carbondata.core.scan.model.ProjectionDimension;
 
 import mockit.Mock;
 import mockit.MockUp;
@@ -267,8 +267,8 @@ public class CarbonUtilTest {
   @Test public void testToGetNextLesserValue() {
 byte[] dataChunks = { 5, 6, 7, 8, 9 };
 byte[] compareValues = { 7 };
-FixedLengthDimensionDataChunk fixedLengthDataChunk =
-new FixedLengthDimensionDataChunk(dataChunks, null, null, 5, 1);
+FixedLengthDimensionColumnPage fixedLengthDataChunk =
+new FixedLengthDimensionColumnPage(dataChunks, null, null, 5, 1);
 int result = CarbonUtil.nextLesserValueToTarget(2, fixedLengthDataChunk, 
compareValues);
 assertEquals(result, 1);
   }
@@ -276,8 +276,8 @@ public class CarbonUtilTest {
   @Test public void testToGetNextLesserValueToTarget() {
 byte[] dataChunks = { 7, 7, 7, 8, 9 };
 byte[] compareValues = { 7 };
-FixedLengthDimensionDataChunk fixedLengthDataChunk =
-new FixedLengthDimensionDataChunk(dataChunks, null, null, 5, 1);
+FixedLengthDimensionColumnPage fixedLengthDataChunk =
+new FixedLengthDimensionColumnPage(dataChunks, null, null, 5, 1);
 int result = CarbonUtil.nextLesserValueToTarget(2, fixedLengthDataChunk, 
compareValues);
 assertEquals(result, -1);
   }
@@ -285,8 +285,8 @@ public class CarbonUtilTest {
   @Test public void testToGetnextGreaterValue() {
 byte[] dataChunks = { 5, 6, 7, 8, 9 };
 byte[] compareValues = { 7 };
-FixedLengthDimensionDataChunk fixedLengthDataChunk =
-new FixedLengthDimensionDataChunk(dataChunks, null, null, 5, 1);
+FixedLengthDimensionColumnPage fixedLengthDataChunk =
+new FixedLengthDimensionColumnPage(dataChunks, null, null, 5, 1);
 int result = CarbonUtil.nextGreaterValueToTarget(2, fixedLengthDataChunk, 
compareValues, 5);
 assertEquals(result, 3);
   }
@@ -302,8 +302,8 @@ public class CarbonUtilTest {
   @Test public void testToGetnextGreaterValueToTarget() {
 byte[] dataChunks = { 5, 6, 7, 7, 7 };
 byte[] compareValues = { 7 };
-FixedLengthDimensionDataChunk fixedLengthDataChunk =
-new FixedLengthDimensionDataChunk(dataChunks, null, null, 5, 1);
+FixedLengthDimensionColumnPage fixedLengthDataChunk =
+new FixedLengthDimensionColumnPage(dataChunks, null, null, 5, 1);
 int result = CarbonUtil.nextGreaterValueToTarget(2, fixedLengthDataChunk, 
compareValues, 5);
 assertEquals(result, 5);
   }
@@ -525,23 +525,23 @@ public class CarbonUtilTest {
   }
 
   @Test public void testToGetDictionaryEncodingArray() {
-QueryDimension column1 = new QueryDimension("Column1");
-QueryDimension column2 = new QueryDimension("Column2");
 ColumnSchema column1Schema = new ColumnSchema();
 ColumnSchema column2Schema = new ColumnSchema();
 column1Schema.setColumnName("Column1");
 List encoding = new ArrayList<>();
 encoding.add(Encoding.DICTIONARY);
 column1Schema.setEncodingList(encoding);
-column1.setDimension(new CarbonDimension(column1Schema, 1, 1, 1, 1));
+ProjectionDimension
+column1 = new ProjectionDimension(new CarbonDimension(column1Schema, 
1, 1, 1, 1));
 
 column2Schema.setColumnName("Column2");
 List encoding2 = new ArrayList<>();
 encoding2.add(Encoding.DELTA);
 column2Schema.setEncodingList(encoding2);
-column2.setDimension(new CarbonDimension(column2Schema, 1, 1, 1, 1));
+ProjectionDimension
+column2 = new ProjectionDimension(new CarbonDimension(column2Schema, 
1, 1, 1, 1));
 
-

[14/50] [abbrv] carbondata git commit: [CARBONDATA-2099] Refactor query scan process to improve readability

2018-03-04 Thread jackylk
http://git-wip-us.apache.org/repos/asf/carbondata/blob/92c9f224/core/src/main/java/org/apache/carbondata/core/scan/executor/impl/AbstractQueryExecutor.java
--
diff --git 
a/core/src/main/java/org/apache/carbondata/core/scan/executor/impl/AbstractQueryExecutor.java
 
b/core/src/main/java/org/apache/carbondata/core/scan/executor/impl/AbstractQueryExecutor.java
index 69f5ceb..22d1df1 100644
--- 
a/core/src/main/java/org/apache/carbondata/core/scan/executor/impl/AbstractQueryExecutor.java
+++ 
b/core/src/main/java/org/apache/carbondata/core/scan/executor/impl/AbstractQueryExecutor.java
@@ -43,10 +43,9 @@ import 
org.apache.carbondata.core.datastore.block.SegmentProperties;
 import org.apache.carbondata.core.datastore.block.TableBlockInfo;
 import org.apache.carbondata.core.datastore.block.TableBlockUniqueIdentifier;
 import org.apache.carbondata.core.indexstore.BlockletDetailInfo;
-import 
org.apache.carbondata.core.indexstore.blockletindex.BlockletDataRefNodeWrapper;
+import org.apache.carbondata.core.indexstore.blockletindex.BlockletDataRefNode;
 import org.apache.carbondata.core.indexstore.blockletindex.IndexWrapper;
 import org.apache.carbondata.core.keygenerator.KeyGenException;
-import org.apache.carbondata.core.keygenerator.KeyGenerator;
 import org.apache.carbondata.core.memory.UnsafeMemoryManager;
 import org.apache.carbondata.core.metadata.AbsoluteTableIdentifier;
 import org.apache.carbondata.core.metadata.blocklet.BlockletInfo;
@@ -64,8 +63,8 @@ import 
org.apache.carbondata.core.scan.executor.util.RestructureUtil;
 import org.apache.carbondata.core.scan.filter.FilterUtil;
 import org.apache.carbondata.core.scan.filter.SingleTableProvider;
 import org.apache.carbondata.core.scan.filter.TableProvider;
-import org.apache.carbondata.core.scan.model.QueryDimension;
-import org.apache.carbondata.core.scan.model.QueryMeasure;
+import org.apache.carbondata.core.scan.model.ProjectionDimension;
+import org.apache.carbondata.core.scan.model.ProjectionMeasure;
 import org.apache.carbondata.core.scan.model.QueryModel;
 import org.apache.carbondata.core.stats.QueryStatistic;
 import org.apache.carbondata.core.stats.QueryStatisticsConstants;
@@ -121,7 +120,6 @@ public abstract class AbstractQueryExecutor implements 
QueryExecutor {
 queryProperties.queryStatisticsRecorder =
 
CarbonTimeStatisticsFactory.createExecutorRecorder(queryModel.getQueryId());
 queryModel.setStatisticsRecorder(queryProperties.queryStatisticsRecorder);
-QueryUtil.resolveQueryModel(queryModel);
 QueryStatistic queryStatistic = new QueryStatistic();
 // sort the block info
 // so block will be loaded in sorted order this will be required for
@@ -168,12 +166,12 @@ public abstract class AbstractQueryExecutor implements 
QueryExecutor {
 .addStatistics(QueryStatisticsConstants.LOAD_BLOCKS_EXECUTOR, 
System.currentTimeMillis());
 queryProperties.queryStatisticsRecorder.recordStatistics(queryStatistic);
 // calculating the total number of aggeragted columns
-int measureCount = queryModel.getQueryMeasures().size();
+int measureCount = queryModel.getProjectionMeasures().size();
 
 int currentIndex = 0;
 DataType[] dataTypes = new DataType[measureCount];
 
-for (QueryMeasure carbonMeasure : queryModel.getQueryMeasures()) {
+for (ProjectionMeasure carbonMeasure : queryModel.getProjectionMeasures()) 
{
   // adding the data type and aggregation type of all the measure this
   // can be used
   // to select the aggregator
@@ -198,9 +196,11 @@ public abstract class AbstractQueryExecutor implements 
QueryExecutor {
 queryStatistic = new QueryStatistic();
 // dictionary column unique column id to dictionary mapping
 // which will be used to get column actual data
-queryProperties.columnToDictionayMapping = QueryUtil
-.getDimensionDictionaryDetail(queryModel.getQueryDimension(),
-queryProperties.complexFilterDimension, 
queryModel.getAbsoluteTableIdentifier(),
+queryProperties.columnToDictionayMapping =
+QueryUtil.getDimensionDictionaryDetail(
+queryModel.getProjectionDimensions(),
+queryProperties.complexFilterDimension,
+queryModel.getAbsoluteTableIdentifier(),
 tableProvider);
 queryStatistic
 .addStatistics(QueryStatisticsConstants.LOAD_DICTIONARY, 
System.currentTimeMillis());
@@ -263,8 +263,8 @@ public abstract class AbstractQueryExecutor implements 
QueryExecutor {
 // and query will be executed based on that infos
 for (int i = 0; i < queryProperties.dataBlocks.size(); i++) {
   AbstractIndex abstractIndex = queryProperties.dataBlocks.get(i);
-  BlockletDataRefNodeWrapper dataRefNode =
-  (BlockletDataRefNodeWrapper) abstractIndex.getDataRefNode();
+  BlockletDataRefNode dataRefNode =
+  (BlockletDataRefNode) abstractIndex.getDataRefNode();
   

[16/50] [abbrv] carbondata git commit: [CARBONDATA-2099] Refactor query scan process to improve readability

2018-03-04 Thread jackylk
http://git-wip-us.apache.org/repos/asf/carbondata/blob/92c9f224/core/src/main/java/org/apache/carbondata/core/datastore/chunk/store/impl/unsafe/UnsafeFixedLengthDimensionDataChunkStore.java
--
diff --git 
a/core/src/main/java/org/apache/carbondata/core/datastore/chunk/store/impl/unsafe/UnsafeFixedLengthDimensionDataChunkStore.java
 
b/core/src/main/java/org/apache/carbondata/core/datastore/chunk/store/impl/unsafe/UnsafeFixedLengthDimensionDataChunkStore.java
index 8c8d08f..a689d8e 100644
--- 
a/core/src/main/java/org/apache/carbondata/core/datastore/chunk/store/impl/unsafe/UnsafeFixedLengthDimensionDataChunkStore.java
+++ 
b/core/src/main/java/org/apache/carbondata/core/datastore/chunk/store/impl/unsafe/UnsafeFixedLengthDimensionDataChunkStore.java
@@ -124,22 +124,22 @@ public class UnsafeFixedLengthDimensionDataChunkStore
   /**
* to compare the two byte array
*
-   * @param indexindex of first byte array
+   * @param rowIdindex of first byte array
* @param compareValue value of to be compared
* @return compare result
*/
-  @Override public int compareTo(int index, byte[] compareValue) {
+  @Override public int compareTo(int rowId, byte[] compareValue) {
 // based on index we need to calculate the actual position in memory block
-index = index * columnValueSize;
+rowId = rowId * columnValueSize;
 int compareResult = 0;
 for (int i = 0; i < compareValue.length; i++) {
   compareResult = (CarbonUnsafe.getUnsafe()
-  .getByte(dataPageMemoryBlock.getBaseObject(), 
dataPageMemoryBlock.getBaseOffset() + index)
+  .getByte(dataPageMemoryBlock.getBaseObject(), 
dataPageMemoryBlock.getBaseOffset() + rowId)
   & 0xff) - (compareValue[i] & 0xff);
   if (compareResult != 0) {
 break;
   }
-  index++;
+  rowId++;
 }
 return compareResult;
   }

http://git-wip-us.apache.org/repos/asf/carbondata/blob/92c9f224/core/src/main/java/org/apache/carbondata/core/datastore/chunk/store/impl/unsafe/UnsafeVariableLengthDimesionDataChunkStore.java
--
diff --git 
a/core/src/main/java/org/apache/carbondata/core/datastore/chunk/store/impl/unsafe/UnsafeVariableLengthDimesionDataChunkStore.java
 
b/core/src/main/java/org/apache/carbondata/core/datastore/chunk/store/impl/unsafe/UnsafeVariableLengthDimesionDataChunkStore.java
index 36b2bd8..e1eb378 100644
--- 
a/core/src/main/java/org/apache/carbondata/core/datastore/chunk/store/impl/unsafe/UnsafeVariableLengthDimesionDataChunkStore.java
+++ 
b/core/src/main/java/org/apache/carbondata/core/datastore/chunk/store/impl/unsafe/UnsafeVariableLengthDimesionDataChunkStore.java
@@ -189,11 +189,11 @@ public class UnsafeVariableLengthDimesionDataChunkStore
   /**
* to compare the two byte array
*
-   * @param index index of first byte array
+   * @param rowId index of first byte array
* @param compareValue value of to be compared
* @return compare result
*/
-  @Override public int compareTo(int index, byte[] compareValue) {
+  @Override public int compareTo(int rowId, byte[] compareValue) {
 // now to get the row from memory block we need to do following thing
 // 1. first get the current offset
 // 2. if it's not a last row- get the next row offset
@@ -201,13 +201,13 @@ public class UnsafeVariableLengthDimesionDataChunkStore
 // else subtract the current row offset
 // with complete data length get the offset of set of data
 int currentDataOffset = 
CarbonUnsafe.getUnsafe().getInt(dataPageMemoryBlock.getBaseObject(),
-dataPageMemoryBlock.getBaseOffset() + this.dataPointersOffsets + 
((long)index
+dataPageMemoryBlock.getBaseOffset() + this.dataPointersOffsets + 
((long) rowId
 * CarbonCommonConstants.INT_SIZE_IN_BYTE * 1L));
 short length = 0;
 // calculating the length of data
-if (index < numberOfRows - 1) {
+if (rowId < numberOfRows - 1) {
   int OffsetOfNextdata = 
CarbonUnsafe.getUnsafe().getInt(dataPageMemoryBlock.getBaseObject(),
-  dataPageMemoryBlock.getBaseOffset() + this.dataPointersOffsets + 
((index + 1)
+  dataPageMemoryBlock.getBaseOffset() + this.dataPointersOffsets + 
((rowId + 1)
   * CarbonCommonConstants.INT_SIZE_IN_BYTE));
   length = (short) (OffsetOfNextdata - (currentDataOffset
   + CarbonCommonConstants.SHORT_SIZE_IN_BYTE));

http://git-wip-us.apache.org/repos/asf/carbondata/blob/92c9f224/core/src/main/java/org/apache/carbondata/core/datastore/columnar/ColumnGroupModel.java
--
diff --git 
a/core/src/main/java/org/apache/carbondata/core/datastore/columnar/ColumnGroupModel.java
 
b/core/src/main/java/org/apache/carbondata/core/datastore/columnar/ColumnGroupModel.java
index 74d268a..e2a4161 100644
--- 

[34/50] [abbrv] carbondata git commit: [CARBONDATA-2023][DataLoad] Add size base block allocation in data loading

2018-03-04 Thread jackylk
[CARBONDATA-2023][DataLoad] Add size base block allocation in data loading

Carbondata assign blocks to nodes at the beginning of data loading.
Previous block allocation strategy is block number based and it will
suffer skewed data problem if the size of input files differs a lot.

We introduced a size based block allocation strategy to optimize data
loading performance in skewed data scenario.

This closes #1808


Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo
Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/8fe8ab4c
Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/8fe8ab4c
Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/8fe8ab4c

Branch: refs/heads/carbonstore
Commit: 8fe8ab4c078de0ccd218f8ba41352896aebd5202
Parents: 28b5720
Author: xuchuanyin 
Authored: Thu Feb 8 14:42:39 2018 +0800
Committer: Jacky Li 
Committed: Sun Mar 4 20:32:13 2018 +0800

--
 .../constants/CarbonLoadOptionConstants.java|  10 +
 .../core/datastore/block/TableBlockInfo.java|  29 ++
 .../carbondata/core/util/CarbonProperties.java  |  11 +
 docs/useful-tips-on-carbondata.md   |   1 +
 .../spark/rdd/NewCarbonDataLoadRDD.scala|   4 +-
 .../spark/sql/hive/DistributionUtil.scala   |   2 +-
 .../spark/rdd/CarbonDataRDDFactory.scala|  18 +-
 .../merger/NodeMultiBlockRelation.java  |  40 ++
 .../processing/util/CarbonLoaderUtil.java   | 480 ---
 .../processing/util/CarbonLoaderUtilTest.java   | 125 +
 10 files changed, 545 insertions(+), 175 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/carbondata/blob/8fe8ab4c/core/src/main/java/org/apache/carbondata/core/constants/CarbonLoadOptionConstants.java
--
diff --git 
a/core/src/main/java/org/apache/carbondata/core/constants/CarbonLoadOptionConstants.java
 
b/core/src/main/java/org/apache/carbondata/core/constants/CarbonLoadOptionConstants.java
index bcfeba0..a6bf60f 100644
--- 
a/core/src/main/java/org/apache/carbondata/core/constants/CarbonLoadOptionConstants.java
+++ 
b/core/src/main/java/org/apache/carbondata/core/constants/CarbonLoadOptionConstants.java
@@ -114,4 +114,14 @@ public final class CarbonLoadOptionConstants {
*/
   public static final int MAX_EXTERNAL_DICTIONARY_SIZE = 1000;
 
+  /**
+   * enable block size based block allocation while loading data. By default, 
carbondata assigns
+   * blocks to node based on block number. If this option is set to `true`, 
carbondata will
+   * consider block size first and make sure that all the nodes will process 
almost equal size of
+   * data. This option is especially useful when you encounter skewed data.
+   */
+  @CarbonProperty
+  public static final String ENABLE_CARBON_LOAD_SKEWED_DATA_OPTIMIZATION
+  = "carbon.load.skewedDataOptimization.enabled";
+  public static final String 
ENABLE_CARBON_LOAD_SKEWED_DATA_OPTIMIZATION_DEFAULT = "false";
 }

http://git-wip-us.apache.org/repos/asf/carbondata/blob/8fe8ab4c/core/src/main/java/org/apache/carbondata/core/datastore/block/TableBlockInfo.java
--
diff --git 
a/core/src/main/java/org/apache/carbondata/core/datastore/block/TableBlockInfo.java
 
b/core/src/main/java/org/apache/carbondata/core/datastore/block/TableBlockInfo.java
index a7bfdba..c0cebe0 100644
--- 
a/core/src/main/java/org/apache/carbondata/core/datastore/block/TableBlockInfo.java
+++ 
b/core/src/main/java/org/apache/carbondata/core/datastore/block/TableBlockInfo.java
@@ -19,6 +19,8 @@ package org.apache.carbondata.core.datastore.block;
 import java.io.IOException;
 import java.io.Serializable;
 import java.nio.charset.Charset;
+import java.util.Arrays;
+import java.util.Comparator;
 import java.util.HashMap;
 import java.util.Map;
 
@@ -98,6 +100,20 @@ public class TableBlockInfo implements Distributable, 
Serializable {
 
   private String dataMapWriterPath;
 
+  /**
+   * comparator to sort by block size in descending order.
+   * Since each line is not exactly the same, the size of a InputSplit may 
differs,
+   * so we allow some deviation for these splits.
+   */
+  public static final Comparator DATA_SIZE_DESC_COMPARATOR =
+  new Comparator() {
+@Override public int compare(Distributable o1, Distributable o2) {
+  long diff =
+  ((TableBlockInfo) o1).getBlockLength() - ((TableBlockInfo) 
o2).getBlockLength();
+  return diff < 0 ? 1 : (diff == 0 ? 0 : -1);
+}
+  };
+
   public TableBlockInfo(String filePath, long blockOffset, String segmentId,
   String[] locations, long blockLength, ColumnarFormatVersion version,
   String[] deletedDeltaFilePath) {
@@ -434,4 +450,17 @@ public class TableBlockInfo 

[03/50] [abbrv] carbondata git commit: [CARBONDATA-1992] Remove partitionId in CarbonTablePath

2018-03-04 Thread jackylk
http://git-wip-us.apache.org/repos/asf/carbondata/blob/5663e916/processing/src/main/java/org/apache/carbondata/processing/loading/steps/DataWriterBatchProcessorStepImpl.java
--
diff --git 
a/processing/src/main/java/org/apache/carbondata/processing/loading/steps/DataWriterBatchProcessorStepImpl.java
 
b/processing/src/main/java/org/apache/carbondata/processing/loading/steps/DataWriterBatchProcessorStepImpl.java
index f030d52..369c1f2 100644
--- 
a/processing/src/main/java/org/apache/carbondata/processing/loading/steps/DataWriterBatchProcessorStepImpl.java
+++ 
b/processing/src/main/java/org/apache/carbondata/processing/loading/steps/DataWriterBatchProcessorStepImpl.java
@@ -24,6 +24,7 @@ import org.apache.carbondata.common.logging.LogServiceFactory;
 import org.apache.carbondata.core.datastore.row.CarbonRow;
 import org.apache.carbondata.core.metadata.CarbonTableIdentifier;
 import org.apache.carbondata.core.util.CarbonTimeStatisticsFactory;
+import org.apache.carbondata.core.util.path.CarbonTablePath;
 import org.apache.carbondata.processing.loading.AbstractDataLoadProcessorStep;
 import org.apache.carbondata.processing.loading.CarbonDataLoadConfiguration;
 import org.apache.carbondata.processing.loading.DataField;
@@ -59,13 +60,11 @@ public class DataWriterBatchProcessorStepImpl extends 
AbstractDataLoadProcessorS
 child.initialize();
   }
 
-  private String[] getStoreLocation(CarbonTableIdentifier tableIdentifier, 
String partitionId) {
-String[] storeLocation = CarbonDataProcessorUtil
-.getLocalDataFolderLocation(tableIdentifier.getDatabaseName(),
-tableIdentifier.getTableName(), 
String.valueOf(configuration.getTaskNo()), partitionId,
-configuration.getSegmentId() + "", false, false);
-CarbonDataProcessorUtil.createLocations(storeLocation);
-return storeLocation;
+  private String[] getStoreLocation(CarbonTableIdentifier tableIdentifier) {
+return CarbonDataProcessorUtil.getLocalDataFolderLocation(
+tableIdentifier.getDatabaseName(), tableIdentifier.getTableName(),
+String.valueOf(configuration.getTaskNo()),
+configuration.getSegmentId(), false, false);
   }
 
   @Override public Iterator[] execute() throws 
CarbonDataLoadingException {
@@ -75,18 +74,19 @@ public class DataWriterBatchProcessorStepImpl extends 
AbstractDataLoadProcessorS
 String tableName = tableIdentifier.getTableName();
 try {
   CarbonTimeStatisticsFactory.getLoadStatisticsInstance()
-  
.recordDictionaryValue2MdkAdd2FileTime(configuration.getPartitionId(),
+  
.recordDictionaryValue2MdkAdd2FileTime(CarbonTablePath.DEPRECATED_PATITION_ID,
   System.currentTimeMillis());
   int i = 0;
+  String[] storeLocation = getStoreLocation(tableIdentifier);
+  CarbonDataProcessorUtil.createLocations(storeLocation);
   for (Iterator iterator : iterators) {
-String[] storeLocation = getStoreLocation(tableIdentifier, 
String.valueOf(i));
 int k = 0;
 while (iterator.hasNext()) {
   CarbonRowBatch next = iterator.next();
   // If no rows from merge sorter, then don't create a file in fact 
column handler
   if (next.hasNext()) {
 CarbonFactDataHandlerModel model = CarbonFactDataHandlerModel
-.createCarbonFactDataHandlerModel(configuration, 
storeLocation, i, k++);
+.createCarbonFactDataHandlerModel(configuration, 
storeLocation, 0, k++);
 CarbonFactHandler dataHandler = CarbonFactHandlerFactory
 .createCarbonFactHandler(model, 
CarbonFactHandlerFactory.FactHandlerType.COLUMNAR);
 dataHandler.initialise();
@@ -119,10 +119,11 @@ public class DataWriterBatchProcessorStepImpl extends 
AbstractDataLoadProcessorS
 
CarbonTimeStatisticsFactory.getLoadStatisticsInstance().recordTotalRecords(rowCounter.get());
 processingComplete(dataHandler);
 CarbonTimeStatisticsFactory.getLoadStatisticsInstance()
-.recordDictionaryValue2MdkAdd2FileTime(configuration.getPartitionId(),
+
.recordDictionaryValue2MdkAdd2FileTime(CarbonTablePath.DEPRECATED_PATITION_ID,
 System.currentTimeMillis());
 CarbonTimeStatisticsFactory.getLoadStatisticsInstance()
-.recordMdkGenerateTotalTime(configuration.getPartitionId(), 
System.currentTimeMillis());
+.recordMdkGenerateTotalTime(CarbonTablePath.DEPRECATED_PATITION_ID,
+System.currentTimeMillis());
   }
 
   private void processingComplete(CarbonFactHandler dataHandler) {

http://git-wip-us.apache.org/repos/asf/carbondata/blob/5663e916/processing/src/main/java/org/apache/carbondata/processing/loading/steps/DataWriterProcessorStepImpl.java
--
diff --git 
a/processing/src/main/java/org/apache/carbondata/processing/loading/steps/DataWriterProcessorStepImpl.java
 

[04/50] [abbrv] carbondata git commit: [CARBONDATA-1992] Remove partitionId in CarbonTablePath

2018-03-04 Thread jackylk
[CARBONDATA-1992] Remove partitionId in CarbonTablePath

In CarbonTablePath, there is a deprecated partition id which is always 0, it 
should be removed to avoid confusion.

This closes #1765


Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo
Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/5663e916
Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/5663e916
Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/5663e916

Branch: refs/heads/carbonstore
Commit: 5663e916fe906675ce8efa320de1ed550315dc00
Parents: 9f2884a
Author: Jacky Li 
Authored: Sat Jan 6 20:28:44 2018 +0800
Committer: Jacky Li 
Committed: Sun Mar 4 20:30:31 2018 +0800

--
 .../core/metadata/PartitionMapFileStore.java|   0
 .../core/mutate/CarbonUpdateUtil.java   |   8 +-
 .../core/statusmanager/LoadMetadataDetails.java |   1 +
 .../SegmentUpdateStatusManager.java |   8 +-
 .../apache/carbondata/core/util/CarbonUtil.java |   6 +-
 .../core/util/path/CarbonTablePath.java |  55 ---
 .../CarbonFormatDirectoryStructureTest.java |   4 +-
 .../hadoop/api/CarbonTableInputFormat.java  |   2 +-
 .../streaming/CarbonStreamRecordWriter.java |   2 +-
 .../hadoop/test/util/StoreCreator.java  |   1 -
 .../presto/util/CarbonDataStoreCreator.scala|   1 -
 .../dataload/TestLoadDataGeneral.scala  |   2 +-
 .../InsertIntoCarbonTableTestCase.scala |   4 +-
 .../dataload/TestBatchSortDataLoad.scala|   3 +-
 .../dataload/TestDataLoadWithFileName.scala |   2 +-
 .../dataload/TestGlobalSortDataLoad.scala   |   4 +-
 .../testsuite/datamap/TestDataMapCommand.scala  |  34 ++--
 .../TestDataLoadingForPartitionTable.scala  |   3 +-
 .../load/DataLoadProcessBuilderOnSpark.scala|   1 -
 .../load/DataLoadProcessorStepOnSpark.scala |   2 +-
 .../spark/rdd/AlterTableLoadPartitionRDD.scala  | 154 +++
 .../carbondata/spark/rdd/CarbonMergerRDD.scala  |  11 +-
 .../spark/rdd/NewCarbonDataLoadRDD.scala|  25 ++-
 .../org/apache/spark/util/PartitionUtils.scala  |   5 +-
 .../spark/rdd/CarbonDataRDDFactory.scala|   5 +-
 .../management/CarbonLoadDataCommand.scala  |   1 -
 .../datasources/CarbonFileFormat.scala  |   1 -
 .../partition/TestAlterPartitionTable.scala |   2 +-
 .../bucketing/TableBucketingTestCase.scala  |   2 +
 .../loading/CarbonDataLoadConfiguration.java|  10 --
 .../loading/DataLoadProcessBuilder.java |   1 -
 .../loading/TableProcessingOperations.java  |   3 +-
 .../loading/model/CarbonLoadModel.java  |  73 +
 .../sort/impl/ParallelReadMergeSorterImpl.java  |   4 +-
 ...arallelReadMergeSorterWithBucketingImpl.java |  15 +-
 .../UnsafeBatchParallelReadMergeSorterImpl.java |   7 +-
 ...arallelReadMergeSorterWithBucketingImpl.java |  21 ++-
 .../CarbonRowDataWriterProcessorStepImpl.java   |  33 ++--
 .../steps/DataWriterBatchProcessorStepImpl.java |  25 +--
 .../steps/DataWriterProcessorStepImpl.java  |  22 +--
 .../processing/merger/CarbonDataMergerUtil.java |   6 +-
 .../merger/CompactionResultSortProcessor.java   |   8 +-
 .../merger/RowResultMergerProcessor.java|   2 +-
 .../partition/spliter/RowResultProcessor.java   |   2 +-
 .../sort/sortdata/SortParameters.java   |  16 +-
 .../store/CarbonFactDataHandlerModel.java   |   3 +-
 .../util/CarbonDataProcessorUtil.java   |  20 +--
 .../processing/util/CarbonLoaderUtil.java   |  12 +-
 .../processing/util/DeleteLoadFolders.java  |   7 +-
 .../carbondata/processing/StoreCreator.java |   1 -
 .../carbondata/streaming/StreamHandoffRDD.scala |   1 -
 .../streaming/StreamSinkFactory.scala   |   2 +-
 .../streaming/CarbonAppendableStreamSink.scala  |   8 +-
 53 files changed, 285 insertions(+), 366 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/carbondata/blob/5663e916/core/src/main/java/org/apache/carbondata/core/metadata/PartitionMapFileStore.java
--
diff --git 
a/core/src/main/java/org/apache/carbondata/core/metadata/PartitionMapFileStore.java
 
b/core/src/main/java/org/apache/carbondata/core/metadata/PartitionMapFileStore.java
new file mode 100644
index 000..e69de29

http://git-wip-us.apache.org/repos/asf/carbondata/blob/5663e916/core/src/main/java/org/apache/carbondata/core/mutate/CarbonUpdateUtil.java
--
diff --git 
a/core/src/main/java/org/apache/carbondata/core/mutate/CarbonUpdateUtil.java 
b/core/src/main/java/org/apache/carbondata/core/mutate/CarbonUpdateUtil.java
index de98fa8..18eae11 100644
--- a/core/src/main/java/org/apache/carbondata/core/mutate/CarbonUpdateUtil.java
+++ 

[08/50] [abbrv] carbondata git commit: [CARBONDATA-2099] Refactor query scan process to improve readability

2018-03-04 Thread jackylk
http://git-wip-us.apache.org/repos/asf/carbondata/blob/92c9f224/core/src/main/java/org/apache/carbondata/core/util/AbstractDataFileFooterConverter.java
--
diff --git 
a/core/src/main/java/org/apache/carbondata/core/util/AbstractDataFileFooterConverter.java
 
b/core/src/main/java/org/apache/carbondata/core/util/AbstractDataFileFooterConverter.java
index 94a041a..b74c279 100644
--- 
a/core/src/main/java/org/apache/carbondata/core/util/AbstractDataFileFooterConverter.java
+++ 
b/core/src/main/java/org/apache/carbondata/core/util/AbstractDataFileFooterConverter.java
@@ -378,7 +378,6 @@ public abstract class AbstractDataFileFooterConverter {
   cardinality[i] = segmentInfo.getColumn_cardinalities().get(i);
 }
 info.setColumnCardinality(cardinality);
-info.setNumberOfColumns(segmentInfo.getNum_cols());
 return info;
   }
 

http://git-wip-us.apache.org/repos/asf/carbondata/blob/92c9f224/core/src/main/java/org/apache/carbondata/core/util/CarbonUtil.java
--
diff --git a/core/src/main/java/org/apache/carbondata/core/util/CarbonUtil.java 
b/core/src/main/java/org/apache/carbondata/core/util/CarbonUtil.java
index 52305bd..0cc783e 100644
--- a/core/src/main/java/org/apache/carbondata/core/util/CarbonUtil.java
+++ b/core/src/main/java/org/apache/carbondata/core/util/CarbonUtil.java
@@ -48,10 +48,10 @@ import 
org.apache.carbondata.core.cache.dictionary.DictionaryColumnUniqueIdentif
 import org.apache.carbondata.core.constants.CarbonCommonConstants;
 import org.apache.carbondata.core.constants.CarbonLoadOptionConstants;
 import org.apache.carbondata.core.datamap.Segment;
-import org.apache.carbondata.core.datastore.FileHolder;
+import org.apache.carbondata.core.datastore.FileReader;
 import org.apache.carbondata.core.datastore.block.AbstractIndex;
 import org.apache.carbondata.core.datastore.block.TableBlockInfo;
-import org.apache.carbondata.core.datastore.chunk.DimensionColumnDataChunk;
+import org.apache.carbondata.core.datastore.chunk.DimensionColumnPage;
 import org.apache.carbondata.core.datastore.chunk.impl.DimensionRawColumnChunk;
 import org.apache.carbondata.core.datastore.chunk.impl.MeasureRawColumnChunk;
 import org.apache.carbondata.core.datastore.columnar.ColumnGroupModel;
@@ -82,7 +82,7 @@ import 
org.apache.carbondata.core.metadata.schema.table.column.ColumnSchema;
 import org.apache.carbondata.core.mutate.UpdateVO;
 import org.apache.carbondata.core.reader.ThriftReader;
 import org.apache.carbondata.core.reader.ThriftReader.TBaseCreator;
-import org.apache.carbondata.core.scan.model.QueryDimension;
+import org.apache.carbondata.core.scan.model.ProjectionDimension;
 import org.apache.carbondata.core.statusmanager.LoadMetadataDetails;
 import org.apache.carbondata.core.statusmanager.SegmentStatus;
 import org.apache.carbondata.core.statusmanager.SegmentStatusManager;
@@ -248,16 +248,13 @@ public final class CarbonUtil {
   public static ColumnGroupModel getColGroupModel(int[][] columnGroups) {
 int[] columnSplit = new int[columnGroups.length];
 int noOfColumnStore = columnSplit.length;
-boolean[] columnarStore = new boolean[noOfColumnStore];
 
 for (int i = 0; i < columnGroups.length; i++) {
   columnSplit[i] = columnGroups[i].length;
-  columnarStore[i] = columnGroups[i].length <= 1;
 }
 ColumnGroupModel colGroupModel = new ColumnGroupModel();
 colGroupModel.setNoOfColumnStore(noOfColumnStore);
 colGroupModel.setColumnSplit(columnSplit);
-colGroupModel.setColumnarStore(columnarStore);
 colGroupModel.setColumnGroup(columnGroups);
 return colGroupModel;
   }
@@ -418,7 +415,7 @@ public final class CarbonUtil {
 }
   }
 
-  public static int getFirstIndexUsingBinarySearch(DimensionColumnDataChunk 
dimColumnDataChunk,
+  public static int getFirstIndexUsingBinarySearch(DimensionColumnPage 
dimColumnDataChunk,
   int low, int high, byte[] compareValue, boolean matchUpLimit) {
 int cmpResult = 0;
 while (high >= low) {
@@ -457,7 +454,7 @@ public final class CarbonUtil {
* @return the compareValue's range index in the dimColumnDataChunk
*/
   public static int[] getRangeIndexUsingBinarySearch(
-  DimensionColumnDataChunk dimColumnDataChunk, int low, int high, byte[] 
compareValue) {
+  DimensionColumnPage dimColumnDataChunk, int low, int high, byte[] 
compareValue) {
 
 int[] rangeIndex = new int[2];
 int cmpResult = 0;
@@ -551,7 +548,7 @@ public final class CarbonUtil {
* @return index value
*/
   public static int nextLesserValueToTarget(int currentIndex,
-  DimensionColumnDataChunk dimColumnDataChunk, byte[] compareValue) {
+  DimensionColumnPage dimColumnDataChunk, byte[] compareValue) {
 while (currentIndex - 1 >= 0
 && dimColumnDataChunk.compareTo(currentIndex - 1, compareValue) >= 0) {
   --currentIndex;
@@ -571,7 +568,7 @@ 

[20/50] [abbrv] carbondata git commit: [CARBONDATA-2025] Unify all path construction through CarbonTablePath static method

2018-03-04 Thread jackylk
http://git-wip-us.apache.org/repos/asf/carbondata/blob/f06824e9/integration/spark2/src/main/scala/org/apache/spark/sql/execution/command/schema/CarbonAlterTableRenameCommand.scala
--
diff --git 
a/integration/spark2/src/main/scala/org/apache/spark/sql/execution/command/schema/CarbonAlterTableRenameCommand.scala
 
b/integration/spark2/src/main/scala/org/apache/spark/sql/execution/command/schema/CarbonAlterTableRenameCommand.scala
index 40b5cfc..753e637 100644
--- 
a/integration/spark2/src/main/scala/org/apache/spark/sql/execution/command/schema/CarbonAlterTableRenameCommand.scala
+++ 
b/integration/spark2/src/main/scala/org/apache/spark/sql/execution/command/schema/CarbonAlterTableRenameCommand.scala
@@ -34,7 +34,7 @@ import 
org.apache.carbondata.core.metadata.CarbonTableIdentifier
 import org.apache.carbondata.core.metadata.schema.table.CarbonTable
 import org.apache.carbondata.core.statusmanager.SegmentStatusManager
 import org.apache.carbondata.core.util.CarbonUtil
-import org.apache.carbondata.core.util.path.CarbonStorePath
+import org.apache.carbondata.core.util.path.CarbonTablePath
 import org.apache.carbondata.events.{AlterTableRenamePostEvent, 
AlterTableRenamePreEvent, OperationContext, OperationListenerBus}
 import org.apache.carbondata.format.SchemaEvolutionEntry
 import org.apache.carbondata.spark.exception.{ConcurrentOperationException, 
MalformedCarbonCommandException}
@@ -97,8 +97,7 @@ private[sql] case class CarbonAlterTableRenameCommand(
   val oldTableIdentifier = carbonTable.getAbsoluteTableIdentifier
   DataMapStoreManager.getInstance().clearDataMaps(oldTableIdentifier)
   // get the latest carbon table and check for column existence
-  val oldTablePath = CarbonStorePath.getCarbonTablePath(oldTableIdentifier)
-  val tableMetadataFile = oldTablePath.getPath
+  val tableMetadataFile = oldTableIdentifier.getTablePath
   val operationContext = new OperationContext
   // TODO: Pass new Table Path in pre-event.
   val alterTableRenamePreEvent: AlterTableRenamePreEvent = 
AlterTableRenamePreEvent(
@@ -108,7 +107,7 @@ private[sql] case class CarbonAlterTableRenameCommand(
 sparkSession)
   OperationListenerBus.getInstance().fireEvent(alterTableRenamePreEvent, 
operationContext)
   val tableInfo: org.apache.carbondata.format.TableInfo =
-metastore.getThriftTableInfo(oldTablePath)(sparkSession)
+metastore.getThriftTableInfo(carbonTable)(sparkSession)
   val schemaEvolutionEntry = new 
SchemaEvolutionEntry(System.currentTimeMillis)
   schemaEvolutionEntry.setTableName(newTableName)
   timeStamp = System.currentTimeMillis()
@@ -117,7 +116,8 @@ private[sql] case class CarbonAlterTableRenameCommand(
   val fileType = FileFactory.getFileType(tableMetadataFile)
   val newTableIdentifier = new CarbonTableIdentifier(oldDatabaseName,
 newTableName, carbonTable.getCarbonTableIdentifier.getTableId)
-  var newTablePath = CarbonUtil.getNewTablePath(oldTablePath, 
newTableIdentifier.getTableName)
+  var newTablePath = CarbonTablePath.getNewTablePath(
+oldTableIdentifier.getTablePath, newTableIdentifier.getTableName)
   metastore.removeTableFromMetadata(oldDatabaseName, oldTableName)
   val hiveClient = 
sparkSession.sessionState.catalog.asInstanceOf[CarbonSessionCatalog]
 .getClient()
@@ -139,9 +139,9 @@ private[sql] case class CarbonAlterTableRenameCommand(
   // changed the rename order to deal with situation when carbon table and 
hive table
   // will point to the same tablePath
   if (FileFactory.isFileExist(tableMetadataFile, fileType)) {
-val rename = FileFactory.getCarbonFile(oldTablePath.getPath, fileType)
-  .renameForce(oldTablePath.getParent.toString + 
CarbonCommonConstants.FILE_SEPARATOR +
-   newTableName)
+val rename = 
FileFactory.getCarbonFile(oldTableIdentifier.getTablePath, fileType)
+  .renameForce(
+CarbonTablePath.getNewTablePath(oldTableIdentifier.getTablePath, 
newTableName))
 if (!rename) {
   renameBadRecords(newTableName, oldTableName, oldDatabaseName)
   sys.error(s"Folder rename failed for table 
$oldDatabaseName.$oldTableName")
@@ -149,7 +149,7 @@ private[sql] case class CarbonAlterTableRenameCommand(
   }
   val updatedParts = updatePartitionLocations(
 partitions,
-oldTablePath.getPath,
+oldTableIdentifier.getTablePath,
 newTablePath,
 sparkSession)
 
@@ -191,13 +191,11 @@ private[sql] case class CarbonAlterTableRenameCommand(
   case e: Exception =>
 LOGGER.error(e, "Rename table failed: " + e.getMessage)
 if (carbonTable != null) {
-  AlterTableUtil
-.revertRenameTableChanges(oldTableIdentifier,
-  newTableName,
-  carbonTable.getTablePath,
-  

[30/50] [abbrv] carbondata git commit: [CARBONDATA-1480]Min Max Index Example for DataMap

2018-03-04 Thread jackylk
[CARBONDATA-1480]Min Max Index Example for DataMap

Datamap Example. Implementation of Min Max Index through Datamap. And Using the 
Index while prunning.

This closes #1359


Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo
Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/f9d15a21
Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/f9d15a21
Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/f9d15a21

Branch: refs/heads/carbonstore
Commit: f9d15a215adc91077f1a6ca6a456e5fce4bc05eb
Parents: dfbdf3d
Author: sounakr 
Authored: Thu Sep 28 16:21:05 2017 +0530
Committer: Jacky Li 
Committed: Sun Mar 4 20:32:12 2018 +0800

--
 .../core/datamap/DataMapStoreManager.java   |  16 +-
 .../carbondata/core/datamap/TableDataMap.java   |  18 +-
 .../carbondata/core/datamap/dev/DataMap.java|  11 +-
 .../core/datamap/dev/DataMapWriter.java |   3 +-
 .../indexstore/SegmentPropertiesFetcher.java|  36 +++
 .../blockletindex/BlockletDataMap.java  |   5 +-
 .../blockletindex/BlockletDataMapFactory.java   |  32 ++-
 datamap/examples/pom.xml| 111 ++
 .../datamap/examples/BlockletMinMax.java|  41 
 .../datamap/examples/MinMaxDataMap.java | 143 
 .../datamap/examples/MinMaxDataMapFactory.java  | 114 ++
 .../datamap/examples/MinMaxDataWriter.java  | 221 +++
 .../examples/MinMaxIndexBlockDetails.java   |  77 +++
 .../MinMaxDataMapExample.scala  |  77 +++
 .../testsuite/datamap/DataMapWriterSuite.scala  |   2 +-
 pom.xml |   2 +
 .../datamap/DataMapWriterListener.java  |   4 +-
 .../store/writer/AbstractFactDataWriter.java|   7 +-
 .../writer/v3/CarbonFactDataWriterImplV3.java   |   3 +
 19 files changed, 894 insertions(+), 29 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/carbondata/blob/f9d15a21/core/src/main/java/org/apache/carbondata/core/datamap/DataMapStoreManager.java
--
diff --git 
a/core/src/main/java/org/apache/carbondata/core/datamap/DataMapStoreManager.java
 
b/core/src/main/java/org/apache/carbondata/core/datamap/DataMapStoreManager.java
index d30483a..90e5fff 100644
--- 
a/core/src/main/java/org/apache/carbondata/core/datamap/DataMapStoreManager.java
+++ 
b/core/src/main/java/org/apache/carbondata/core/datamap/DataMapStoreManager.java
@@ -26,6 +26,7 @@ import org.apache.carbondata.common.logging.LogService;
 import org.apache.carbondata.common.logging.LogServiceFactory;
 import org.apache.carbondata.core.datamap.dev.DataMapFactory;
 import org.apache.carbondata.core.indexstore.BlockletDetailsFetcher;
+import org.apache.carbondata.core.indexstore.SegmentPropertiesFetcher;
 import org.apache.carbondata.core.indexstore.blockletindex.BlockletDataMap;
 import 
org.apache.carbondata.core.indexstore.blockletindex.BlockletDataMapFactory;
 import org.apache.carbondata.core.metadata.AbsoluteTableIdentifier;
@@ -103,7 +104,7 @@ public final class DataMapStoreManager {
   tableDataMaps = new ArrayList<>();
 }
 TableDataMap dataMap = getTableDataMap(dataMapName, tableDataMaps);
-if (dataMap != null) {
+if (dataMap != null && 
dataMap.getDataMapName().equalsIgnoreCase(dataMapName)) {
   throw new RuntimeException("Already datamap exists in that path with 
type " + dataMapName);
 }
 
@@ -113,12 +114,15 @@ public final class DataMapStoreManager {
   DataMapFactory dataMapFactory = factoryClass.newInstance();
   dataMapFactory.init(identifier, dataMapName);
   BlockletDetailsFetcher blockletDetailsFetcher;
+  SegmentPropertiesFetcher segmentPropertiesFetcher = null;
   if (dataMapFactory instanceof BlockletDetailsFetcher) {
 blockletDetailsFetcher = (BlockletDetailsFetcher) dataMapFactory;
   } else {
 blockletDetailsFetcher = getBlockletDetailsFetcher(identifier);
   }
-  dataMap = new TableDataMap(identifier, dataMapName, dataMapFactory, 
blockletDetailsFetcher);
+  segmentPropertiesFetcher = (SegmentPropertiesFetcher) 
blockletDetailsFetcher;
+  dataMap = new TableDataMap(identifier, dataMapName, dataMapFactory, 
blockletDetailsFetcher,
+  segmentPropertiesFetcher);
 } catch (Exception e) {
   LOGGER.error(e);
   throw new RuntimeException(e);
@@ -128,11 +132,11 @@ public final class DataMapStoreManager {
 return dataMap;
   }
 
-  private TableDataMap getTableDataMap(String dataMapName,
-  List tableDataMaps) {
+  private TableDataMap getTableDataMap(String dataMapName, List 
tableDataMaps) {
 TableDataMap dataMap = null;
-for (TableDataMap tableDataMap: tableDataMaps) {
-  if 

[19/50] [abbrv] carbondata git commit: [CARBONDATA-2025] Unify all path construction through CarbonTablePath static method

2018-03-04 Thread jackylk
http://git-wip-us.apache.org/repos/asf/carbondata/blob/f06824e9/processing/src/test/java/org/apache/carbondata/carbon/datastore/BlockIndexStoreTest.java
--
diff --git 
a/processing/src/test/java/org/apache/carbondata/carbon/datastore/BlockIndexStoreTest.java
 
b/processing/src/test/java/org/apache/carbondata/carbon/datastore/BlockIndexStoreTest.java
index cd1e28a..d30891a 100644
--- 
a/processing/src/test/java/org/apache/carbondata/carbon/datastore/BlockIndexStoreTest.java
+++ 
b/processing/src/test/java/org/apache/carbondata/carbon/datastore/BlockIndexStoreTest.java
@@ -63,30 +63,6 @@ public class BlockIndexStoreTest extends TestCase {
 
   }
 
-//  public void testLoadAndGetTaskIdToSegmentsMapForSingleSegment()
-//  throws IOException {
-//File file = getPartFile();
-//TableBlockInfo info =
-//new TableBlockInfo(file.getAbsolutePath(), 0, "0", new String[] { 
"loclhost" },
-//file.length(), ColumnarFormatVersion.V1, null);
-//CarbonTableIdentifier carbonTableIdentifier =
-//new 
CarbonTableIdentifier(CarbonCommonConstants.DATABASE_DEFAULT_NAME, "t3", "1");
-//AbsoluteTableIdentifier absoluteTableIdentifier =
-//new AbsoluteTableIdentifier("/src/test/resources", 
carbonTableIdentifier);
-//try {
-//
-//  List tableBlockInfoList =
-//  getTableBlockUniqueIdentifierList(Arrays.asList(new 
TableBlockInfo[] { info }), absoluteTableIdentifier);
-//  List loadAndGetBlocks = 
cache.getAll(tableBlockInfoList);
-//  assertTrue(loadAndGetBlocks.size() == 1);
-//} catch (Exception e) {
-//  assertTrue(false);
-//}
-//List segmentIds = new ArrayList<>();
-//  segmentIds.add(info.getSegment());
-//cache.removeTableBlocks(segmentIds, absoluteTableIdentifier);
-//  }
-//
   private List 
getTableBlockUniqueIdentifierList(List tableBlockInfos,
   AbsoluteTableIdentifier absoluteTableIdentifier) {
 List tableBlockUniqueIdentifiers = new 
ArrayList<>();
@@ -95,138 +71,6 @@ public class BlockIndexStoreTest extends TestCase {
 }
 return tableBlockUniqueIdentifiers;
   }
-//
-//  public void 
testloadAndGetTaskIdToSegmentsMapForSameBlockLoadedConcurrently()
-//  throws IOException {
-//String canonicalPath =
-//new File(this.getClass().getResource("/").getPath() + 
"/../../").getCanonicalPath();
-//File file = getPartFile();
-//TableBlockInfo info =
-//new TableBlockInfo(file.getAbsolutePath(), 0, "0", new String[] { 
"loclhost" },
-//file.length(), ColumnarFormatVersion.V1, null);
-//TableBlockInfo info1 =
-//new TableBlockInfo(file.getAbsolutePath(), 0, "0", new String[] { 
"loclhost" },
-//file.length(), ColumnarFormatVersion.V1, null);
-//
-//TableBlockInfo info2 =
-//new TableBlockInfo(file.getAbsolutePath(), 0, "1", new String[] { 
"loclhost" },
-//file.length(), ColumnarFormatVersion.V1, null);
-//TableBlockInfo info3 =
-//new TableBlockInfo(file.getAbsolutePath(), 0, "1", new String[] { 
"loclhost" },
-//file.length(), ColumnarFormatVersion.V1, null);
-//TableBlockInfo info4 =
-//new TableBlockInfo(file.getAbsolutePath(), 0, "1", new String[] { 
"loclhost" },
-//file.length(), ColumnarFormatVersion.V1, null);
-//
-//CarbonTableIdentifier carbonTableIdentifier =
-//new 
CarbonTableIdentifier(CarbonCommonConstants.DATABASE_DEFAULT_NAME, "t3", "1");
-//AbsoluteTableIdentifier absoluteTableIdentifier =
-//new AbsoluteTableIdentifier("/src/test/resources", 
carbonTableIdentifier);
-//ExecutorService executor = Executors.newFixedThreadPool(3);
-//executor.submit(new BlockLoaderThread(Arrays.asList(new TableBlockInfo[] 
{ info, info1 }),
-//absoluteTableIdentifier));
-//executor.submit(
-//new BlockLoaderThread(Arrays.asList(new TableBlockInfo[] { info2, 
info3, info4 }),
-//absoluteTableIdentifier));
-//executor.submit(new BlockLoaderThread(Arrays.asList(new TableBlockInfo[] 
{ info, info1 }),
-//absoluteTableIdentifier));
-//executor.submit(
-//new BlockLoaderThread(Arrays.asList(new TableBlockInfo[] { info2, 
info3, info4 }),
-//absoluteTableIdentifier));
-//executor.shutdown();
-//try {
-//  executor.awaitTermination(1, TimeUnit.DAYS);
-//} catch (InterruptedException e) {
-//  e.printStackTrace();
-//}
-//List tableBlockInfos =
-//Arrays.asList(new TableBlockInfo[] { info, info1, info2, info3, 
info4 });
-//try {
-//  List tableBlockUniqueIdentifiers =
-//  getTableBlockUniqueIdentifierList(tableBlockInfos, 
absoluteTableIdentifier);
-//  List loadAndGetBlocks = 
cache.getAll(tableBlockUniqueIdentifiers);
-//  assertTrue(loadAndGetBlocks.size() == 5);
-//} catch (Exception e) {
-//  assertTrue(false);
-//}
-//List 

[25/50] [abbrv] carbondata git commit: [CARBONDATA-2023][DataLoad] Add size base block allocation in data loading

2018-03-04 Thread jackylk
[CARBONDATA-2023][DataLoad] Add size base block allocation in data loading

Carbondata assign blocks to nodes at the beginning of data loading.
Previous block allocation strategy is block number based and it will
suffer skewed data problem if the size of input files differs a lot.

We introduced a size based block allocation strategy to optimize data
loading performance in skewed data scenario.

This closes #1808


Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo
Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/3fdd5d0f
Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/3fdd5d0f
Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/3fdd5d0f

Branch: refs/heads/carbonstore
Commit: 3fdd5d0f567e8d07cc502202ced7d490fa85e2ad
Parents: 0bb4aed
Author: xuchuanyin 
Authored: Thu Feb 8 14:42:39 2018 +0800
Committer: Jacky Li 
Committed: Sun Mar 4 20:32:12 2018 +0800

--
 .../constants/CarbonLoadOptionConstants.java|  10 +
 .../core/datastore/block/TableBlockInfo.java|  29 ++
 .../carbondata/core/util/CarbonProperties.java  |  11 +
 docs/useful-tips-on-carbondata.md   |   1 +
 .../CarbonIndexFileMergeTestCase.scala  |   4 -
 .../StandardPartitionTableLoadingTestCase.scala |   2 +-
 .../spark/rdd/NewCarbonDataLoadRDD.scala|   4 +-
 .../spark/sql/hive/DistributionUtil.scala   |   2 +-
 .../spark/rdd/CarbonDataRDDFactory.scala|  18 +-
 .../merger/NodeMultiBlockRelation.java  |  40 ++
 .../processing/util/CarbonLoaderUtil.java   | 494 ---
 .../processing/util/CarbonLoaderUtilTest.java   | 125 +
 12 files changed, 552 insertions(+), 188 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/carbondata/blob/3fdd5d0f/core/src/main/java/org/apache/carbondata/core/constants/CarbonLoadOptionConstants.java
--
diff --git 
a/core/src/main/java/org/apache/carbondata/core/constants/CarbonLoadOptionConstants.java
 
b/core/src/main/java/org/apache/carbondata/core/constants/CarbonLoadOptionConstants.java
index bcfeba0..a6bf60f 100644
--- 
a/core/src/main/java/org/apache/carbondata/core/constants/CarbonLoadOptionConstants.java
+++ 
b/core/src/main/java/org/apache/carbondata/core/constants/CarbonLoadOptionConstants.java
@@ -114,4 +114,14 @@ public final class CarbonLoadOptionConstants {
*/
   public static final int MAX_EXTERNAL_DICTIONARY_SIZE = 1000;
 
+  /**
+   * enable block size based block allocation while loading data. By default, 
carbondata assigns
+   * blocks to node based on block number. If this option is set to `true`, 
carbondata will
+   * consider block size first and make sure that all the nodes will process 
almost equal size of
+   * data. This option is especially useful when you encounter skewed data.
+   */
+  @CarbonProperty
+  public static final String ENABLE_CARBON_LOAD_SKEWED_DATA_OPTIMIZATION
+  = "carbon.load.skewedDataOptimization.enabled";
+  public static final String 
ENABLE_CARBON_LOAD_SKEWED_DATA_OPTIMIZATION_DEFAULT = "false";
 }

http://git-wip-us.apache.org/repos/asf/carbondata/blob/3fdd5d0f/core/src/main/java/org/apache/carbondata/core/datastore/block/TableBlockInfo.java
--
diff --git 
a/core/src/main/java/org/apache/carbondata/core/datastore/block/TableBlockInfo.java
 
b/core/src/main/java/org/apache/carbondata/core/datastore/block/TableBlockInfo.java
index a7bfdba..c0cebe0 100644
--- 
a/core/src/main/java/org/apache/carbondata/core/datastore/block/TableBlockInfo.java
+++ 
b/core/src/main/java/org/apache/carbondata/core/datastore/block/TableBlockInfo.java
@@ -19,6 +19,8 @@ package org.apache.carbondata.core.datastore.block;
 import java.io.IOException;
 import java.io.Serializable;
 import java.nio.charset.Charset;
+import java.util.Arrays;
+import java.util.Comparator;
 import java.util.HashMap;
 import java.util.Map;
 
@@ -98,6 +100,20 @@ public class TableBlockInfo implements Distributable, 
Serializable {
 
   private String dataMapWriterPath;
 
+  /**
+   * comparator to sort by block size in descending order.
+   * Since each line is not exactly the same, the size of a InputSplit may 
differs,
+   * so we allow some deviation for these splits.
+   */
+  public static final Comparator DATA_SIZE_DESC_COMPARATOR =
+  new Comparator() {
+@Override public int compare(Distributable o1, Distributable o2) {
+  long diff =
+  ((TableBlockInfo) o1).getBlockLength() - ((TableBlockInfo) 
o2).getBlockLength();
+  return diff < 0 ? 1 : (diff == 0 ? 0 : -1);
+}
+  };
+
   public TableBlockInfo(String filePath, long blockOffset, String segmentId,
   String[] locations, long blockLength, 

[22/50] [abbrv] carbondata git commit: [CARBONDATA-2025] Unify all path construction through CarbonTablePath static method

2018-03-04 Thread jackylk
http://git-wip-us.apache.org/repos/asf/carbondata/blob/f06824e9/core/src/main/java/org/apache/carbondata/core/util/path/CarbonTablePath.java
--
diff --git 
a/core/src/main/java/org/apache/carbondata/core/util/path/CarbonTablePath.java 
b/core/src/main/java/org/apache/carbondata/core/util/path/CarbonTablePath.java
index 9799ac2..b7b5e43 100644
--- 
a/core/src/main/java/org/apache/carbondata/core/util/path/CarbonTablePath.java
+++ 
b/core/src/main/java/org/apache/carbondata/core/util/path/CarbonTablePath.java
@@ -22,16 +22,14 @@ import 
org.apache.carbondata.core.constants.CarbonCommonConstants;
 import org.apache.carbondata.core.datastore.filesystem.CarbonFile;
 import org.apache.carbondata.core.datastore.filesystem.CarbonFileFilter;
 import org.apache.carbondata.core.datastore.impl.FileFactory;
-import org.apache.carbondata.core.metadata.CarbonTableIdentifier;
 import org.apache.carbondata.core.metadata.ColumnarFormatVersion;
 
 import org.apache.hadoop.fs.Path;
 
-
 /**
  * Helps to get Table content paths.
  */
-public class CarbonTablePath extends Path {
+public class CarbonTablePath {
 
   private static final String METADATA_DIR = "Metadata";
   private static final String DICTIONARY_EXT = ".dict";
@@ -54,19 +52,10 @@ public class CarbonTablePath extends Path {
   private static final String STREAMING_LOG_DIR = "log";
   private static final String STREAMING_CHECKPOINT_DIR = "checkpoint";
 
-  private String tablePath;
-  private CarbonTableIdentifier carbonTableIdentifier;
-
   /**
-   * structure CarbonTablePath object to manage table paths
-   *
-   * @param carbonTableIdentifier identifier of carbon table that the segment 
belong to
-   * @param tablePathString the store path of the segment
+   * This class provides static utility only.
*/
-  public CarbonTablePath(CarbonTableIdentifier carbonTableIdentifier, String 
tablePathString) {
-super(tablePathString);
-this.carbonTableIdentifier = carbonTableIdentifier;
-this.tablePath = tablePathString;
+  private CarbonTablePath() {
   }
 
   /**
@@ -130,40 +119,21 @@ public class CarbonTablePath extends Path {
   }
 
   /**
-   * gets table path
+   * Return absolute path of dictionary file
*/
-  public String getPath() {
-return tablePath;
+  public static String getDictionaryFilePath(String tablePath, String 
columnId) {
+return getMetadataPath(tablePath) + File.separator + 
getDictionaryFileName(columnId);
   }
 
   /**
-   * @param columnId unique column identifier
-   * @return absolute path of dictionary file
+   * Return absolute path of dictionary file
*/
-  public String getDictionaryFilePath(String columnId) {
-return getMetaDataDir() + File.separator + getDictionaryFileName(columnId);
-  }
-
-  /**
-   * @param dictionaryPath
-   * @param columnId unique column identifier
-   * @return absolute path of dictionary file
-   */
-  public String getDictionaryFilePath(String dictionaryPath, String columnId) {
+  public static String getExternalDictionaryFilePath(String dictionaryPath, 
String columnId) {
 return dictionaryPath + File.separator + getDictionaryFileName(columnId);
   }
 
   /**
-   * This method will return the metadata directory location for a table
-   *
-   * @return
-   */
-  public String getMetadataDirectoryPath() {
-return getMetaDataDir();
-  }
-
-  /**
-   * Return metadata path based on `tablePath`
+   * Return metadata path
*/
   public static String getMetadataPath(String tablePath) {
 return tablePath + File.separator + METADATA_DIR;
@@ -184,67 +154,42 @@ public class CarbonTablePath extends Path {
   }
 
   /**
-   * @param columnId unique column identifier
-   * @return absolute path of dictionary meta file
+   * Return absolute path of dictionary meta file
*/
-  public String getDictionaryMetaFilePath(String columnId) {
-return getMetaDataDir() + File.separator + columnId + DICTIONARY_META_EXT;
-  }
-
-  /**
-   * @param dictionaryPath
-   * @param columnId unique column identifier
-   * @return absolute path of dictionary file
-   */
-  public String getDictionaryMetaFilePath(String dictionaryPath, String 
columnId) {
+  public static String getExternalDictionaryMetaFilePath(String 
dictionaryPath, String columnId) {
 return dictionaryPath + File.separator + columnId + DICTIONARY_META_EXT;
   }
 
   /**
-   * @param columnId unique column identifier
-   * @return absolute path of sort index file
+   * Return absolute path of dictionary meta file
*/
-  public String getSortIndexFilePath(String columnId) {
-return getMetaDataDir() + File.separator + columnId + SORT_INDEX_EXT;
+  public static String getDictionaryMetaFilePath(String tablePath, String 
columnId) {
+return getMetadataPath(tablePath) + File.separator + columnId + 
DICTIONARY_META_EXT;
   }
 
   /**
-   * @param dictionaryPath
-   * @param columnId unique column identifier
-   * @return absolute path of dictionary 

[35/50] [abbrv] carbondata git commit: Revert "[CARBONDATA-2023][DataLoad] Add size base block allocation in data loading"

2018-03-04 Thread jackylk
Revert "[CARBONDATA-2023][DataLoad] Add size base block allocation in data 
loading"

This reverts commit 6dd8b038fc898dbf48ad30adfc870c19eb38e3d0.


Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo
Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/1d85e916
Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/1d85e916
Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/1d85e916

Branch: refs/heads/carbonstore
Commit: 1d85e916f6a0f070960555fb18ee4cd8acbfa315
Parents: 6216294
Author: Jacky Li 
Authored: Sat Feb 10 10:34:59 2018 +0800
Committer: Jacky Li 
Committed: Sun Mar 4 20:32:13 2018 +0800

--
 .../constants/CarbonLoadOptionConstants.java|  10 -
 .../core/datastore/block/TableBlockInfo.java|  29 --
 .../carbondata/core/util/CarbonProperties.java  |  11 -
 docs/useful-tips-on-carbondata.md   |   1 -
 .../spark/rdd/NewCarbonDataLoadRDD.scala|   4 +-
 .../spark/sql/hive/DistributionUtil.scala   |   2 +-
 .../spark/rdd/CarbonDataRDDFactory.scala|  18 +-
 .../merger/NodeMultiBlockRelation.java  |  40 --
 .../processing/util/CarbonLoaderUtil.java   | 494 +++
 .../processing/util/CarbonLoaderUtilTest.java   | 125 -
 10 files changed, 183 insertions(+), 551 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/carbondata/blob/1d85e916/core/src/main/java/org/apache/carbondata/core/constants/CarbonLoadOptionConstants.java
--
diff --git 
a/core/src/main/java/org/apache/carbondata/core/constants/CarbonLoadOptionConstants.java
 
b/core/src/main/java/org/apache/carbondata/core/constants/CarbonLoadOptionConstants.java
index a6bf60f..bcfeba0 100644
--- 
a/core/src/main/java/org/apache/carbondata/core/constants/CarbonLoadOptionConstants.java
+++ 
b/core/src/main/java/org/apache/carbondata/core/constants/CarbonLoadOptionConstants.java
@@ -114,14 +114,4 @@ public final class CarbonLoadOptionConstants {
*/
   public static final int MAX_EXTERNAL_DICTIONARY_SIZE = 1000;
 
-  /**
-   * enable block size based block allocation while loading data. By default, 
carbondata assigns
-   * blocks to node based on block number. If this option is set to `true`, 
carbondata will
-   * consider block size first and make sure that all the nodes will process 
almost equal size of
-   * data. This option is especially useful when you encounter skewed data.
-   */
-  @CarbonProperty
-  public static final String ENABLE_CARBON_LOAD_SKEWED_DATA_OPTIMIZATION
-  = "carbon.load.skewedDataOptimization.enabled";
-  public static final String 
ENABLE_CARBON_LOAD_SKEWED_DATA_OPTIMIZATION_DEFAULT = "false";
 }

http://git-wip-us.apache.org/repos/asf/carbondata/blob/1d85e916/core/src/main/java/org/apache/carbondata/core/datastore/block/TableBlockInfo.java
--
diff --git 
a/core/src/main/java/org/apache/carbondata/core/datastore/block/TableBlockInfo.java
 
b/core/src/main/java/org/apache/carbondata/core/datastore/block/TableBlockInfo.java
index c0cebe0..a7bfdba 100644
--- 
a/core/src/main/java/org/apache/carbondata/core/datastore/block/TableBlockInfo.java
+++ 
b/core/src/main/java/org/apache/carbondata/core/datastore/block/TableBlockInfo.java
@@ -19,8 +19,6 @@ package org.apache.carbondata.core.datastore.block;
 import java.io.IOException;
 import java.io.Serializable;
 import java.nio.charset.Charset;
-import java.util.Arrays;
-import java.util.Comparator;
 import java.util.HashMap;
 import java.util.Map;
 
@@ -100,20 +98,6 @@ public class TableBlockInfo implements Distributable, 
Serializable {
 
   private String dataMapWriterPath;
 
-  /**
-   * comparator to sort by block size in descending order.
-   * Since each line is not exactly the same, the size of a InputSplit may 
differs,
-   * so we allow some deviation for these splits.
-   */
-  public static final Comparator DATA_SIZE_DESC_COMPARATOR =
-  new Comparator() {
-@Override public int compare(Distributable o1, Distributable o2) {
-  long diff =
-  ((TableBlockInfo) o1).getBlockLength() - ((TableBlockInfo) 
o2).getBlockLength();
-  return diff < 0 ? 1 : (diff == 0 ? 0 : -1);
-}
-  };
-
   public TableBlockInfo(String filePath, long blockOffset, String segmentId,
   String[] locations, long blockLength, ColumnarFormatVersion version,
   String[] deletedDeltaFilePath) {
@@ -450,17 +434,4 @@ public class TableBlockInfo implements Distributable, 
Serializable {
   public void setDataMapWriterPath(String dataMapWriterPath) {
 this.dataMapWriterPath = dataMapWriterPath;
   }
-
-  @Override
-  public String toString() {
-final StringBuilder sb = new StringBuilder("TableBlockInfo{");
-

[33/50] [abbrv] carbondata git commit: [HotFix][CheckStyle] Fix import related checkstyle

2018-03-04 Thread jackylk
[HotFix][CheckStyle] Fix import related checkstyle

This closes #1952


Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo
Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/d88d5bb9
Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/d88d5bb9
Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/d88d5bb9

Branch: refs/heads/carbonstore
Commit: d88d5bb940f0fea6e5c8560fc5c8ea3724b95a28
Parents: bb5bb00
Author: xuchuanyin 
Authored: Thu Feb 8 15:39:45 2018 +0800
Committer: Jacky Li 
Committed: Sun Mar 4 20:32:12 2018 +0800

--
 .../core/indexstore/blockletindex/BlockletDataRefNode.java | 2 +-
 .../org/apache/carbondata/core/memory/HeapMemoryAllocator.java | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/carbondata/blob/d88d5bb9/core/src/main/java/org/apache/carbondata/core/indexstore/blockletindex/BlockletDataRefNode.java
--
diff --git 
a/core/src/main/java/org/apache/carbondata/core/indexstore/blockletindex/BlockletDataRefNode.java
 
b/core/src/main/java/org/apache/carbondata/core/indexstore/blockletindex/BlockletDataRefNode.java
index b8fd6ff..50862a7 100644
--- 
a/core/src/main/java/org/apache/carbondata/core/indexstore/blockletindex/BlockletDataRefNode.java
+++ 
b/core/src/main/java/org/apache/carbondata/core/indexstore/blockletindex/BlockletDataRefNode.java
@@ -33,8 +33,8 @@ import 
org.apache.carbondata.core.datastore.chunk.reader.MeasureColumnChunkReade
 import org.apache.carbondata.core.indexstore.BlockletDetailInfo;
 import org.apache.carbondata.core.indexstore.FineGrainBlocklet;
 import org.apache.carbondata.core.metadata.ColumnarFormatVersion;
-import org.apache.carbondata.core.util.BitSetGroup;
 import org.apache.carbondata.core.metadata.blocklet.index.BlockletIndex;
+import org.apache.carbondata.core.util.BitSetGroup;
 
 /**
  * wrapper for blocklet data map data

http://git-wip-us.apache.org/repos/asf/carbondata/blob/d88d5bb9/core/src/main/java/org/apache/carbondata/core/memory/HeapMemoryAllocator.java
--
diff --git 
a/core/src/main/java/org/apache/carbondata/core/memory/HeapMemoryAllocator.java 
b/core/src/main/java/org/apache/carbondata/core/memory/HeapMemoryAllocator.java
index 53cbb1d..242995b 100644
--- 
a/core/src/main/java/org/apache/carbondata/core/memory/HeapMemoryAllocator.java
+++ 
b/core/src/main/java/org/apache/carbondata/core/memory/HeapMemoryAllocator.java
@@ -17,11 +17,11 @@
 
 package org.apache.carbondata.core.memory;
 
-import javax.annotation.concurrent.GuardedBy;
 import java.lang.ref.WeakReference;
 import java.util.HashMap;
 import java.util.LinkedList;
 import java.util.Map;
+import javax.annotation.concurrent.GuardedBy;
 
 import org.apache.carbondata.core.util.CarbonProperties;
 



[23/50] [abbrv] carbondata git commit: [CARBONDATA-2025] Unify all path construction through CarbonTablePath static method

2018-03-04 Thread jackylk
[CARBONDATA-2025] Unify all path construction through CarbonTablePath static 
method

Refactory CarbonTablePath:

1.Remove CarbonStorePath and use CarbonTablePath only.
2.Make CarbonTablePath an utility without object creation, it can avoid 
creating object before using it, thus code is cleaner and GC is less.

This closes #1768


Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo
Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/f06824e9
Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/f06824e9
Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/f06824e9

Branch: refs/heads/carbonstore
Commit: f06824e9744a831776b1203c94d4001eef870b14
Parents: 92c9f22
Author: Jacky Li 
Authored: Wed Jan 31 16:14:27 2018 +0800
Committer: Jacky Li 
Committed: Sun Mar 4 20:32:00 2018 +0800

--
 .../DictionaryColumnUniqueIdentifier.java   |  29 +-
 .../dictionary/ManageDictionaryAndBTree.java|  13 +-
 .../core/metadata/AbsoluteTableIdentifier.java  |   4 +-
 .../core/metadata/SegmentFileStore.java |   3 +-
 .../core/metadata/schema/table/CarbonTable.java |  11 +-
 .../core/mutate/CarbonUpdateUtil.java   |  58 ++--
 .../core/scan/executor/util/QueryUtil.java  |   7 +-
 .../scan/executor/util/RestructureUtil.java |   6 +-
 .../scan/filter/FilterExpressionProcessor.java  |   2 +-
 .../filter/executer/FalseFilterExecutor.java|   8 +-
 .../RowLevelRangeGrtThanFiterExecuterImpl.java  |   1 +
 ...elRangeGrtrThanEquaToFilterExecuterImpl.java |   1 +
 ...velRangeLessThanEqualFilterExecuterImpl.java |   1 +
 .../RowLevelRangeLessThanFiterExecuterImpl.java |   1 +
 .../FalseConditionalResolverImpl.java   |   4 +-
 .../core/service/CarbonCommonFactory.java   |  16 -
 .../carbondata/core/service/PathService.java|  35 ---
 .../core/service/impl/PathFactory.java  |  50 
 .../statusmanager/SegmentStatusManager.java |  31 +-
 .../SegmentUpdateStatusManager.java |  70 ++---
 .../apache/carbondata/core/util/CarbonUtil.java |  98 ++
 .../util/path/CarbonSharedDictionaryPath.java   |  71 -
 .../core/util/path/CarbonStorePath.java |  71 -
 .../core/util/path/CarbonTablePath.java | 298 ++-
 .../dictionary/AbstractDictionaryCacheTest.java |  11 +-
 .../dictionary/ForwardDictionaryCacheTest.java  |   6 +-
 .../dictionary/ReverseDictionaryCacheTest.java  |   6 +-
 .../reader/CarbonDictionaryReaderImplTest.java  |   8 -
 .../CarbonFormatDirectoryStructureTest.java |  18 +-
 .../path/CarbonFormatSharedDictionaryTest.java  |  44 ---
 .../writer/CarbonDictionaryWriterImplTest.java  |  19 +-
 .../CarbonBatchSparkStreamingExample.scala  |   9 +-
 .../CarbonStreamSparkStreamingExample.scala |  10 +-
 .../CarbonStructuredStreamingExample.scala  |  11 +-
 ...CarbonStructuredStreamingWithRowParser.scala |   9 +-
 .../hadoop/api/CarbonTableInputFormat.java  |   8 +-
 .../streaming/CarbonStreamRecordWriter.java |   6 +-
 .../carbondata/hadoop/util/SchemaReader.java|  18 +-
 .../hadoop/test/util/StoreCreator.java  |  16 +-
 .../presto/CarbondataRecordSetProvider.java |   7 +-
 .../presto/impl/CarbonTableCacheModel.java  |  13 +-
 .../presto/impl/CarbonTableReader.java  |  49 ++-
 .../presto/util/CarbonDataStoreCreator.scala|  10 +-
 .../sdv/generated/MergeIndexTestCase.scala  |  11 +-
 .../dataload/TestLoadDataGeneral.scala  |  11 +-
 .../InsertIntoCarbonTableTestCase.scala |   8 +-
 .../createTable/TestCreateTableAsSelect.scala   |   2 +-
 .../datacompaction/DataCompactionLockTest.scala |   6 +-
 .../MajorCompactionIgnoreInMinorTest.scala  |  12 +-
 .../dataload/TestBatchSortDataLoad.scala|   5 +-
 .../dataload/TestDataLoadWithFileName.scala |   5 +-
 .../dataload/TestGlobalSortDataLoad.scala   |   8 +-
 .../dataretention/DataRetentionTestCase.scala   |   6 +-
 .../TestDataLoadingForPartitionTable.scala  |   4 +-
 .../StandardPartitionTableLoadingTestCase.scala |   6 +-
 .../org/apache/carbondata/api/CarbonStore.scala |   4 +-
 .../carbondata/spark/CarbonSparkFactory.scala   |   2 +-
 .../spark/DictionaryDetailHelper.scala  |   9 +-
 .../spark/rdd/AlterTableAddColumnRDD.scala  |  17 +-
 .../spark/rdd/CarbonGlobalDictionaryRDD.scala   |   6 -
 .../carbondata/spark/util/CommonUtil.scala  |  30 +-
 .../carbondata/spark/util/DataLoadingUtil.scala |  10 +-
 .../spark/util/GlobalDictionaryUtil.scala   |  16 +-
 .../command/carbonTableSchemaCommon.scala   |   9 +-
 .../org/apache/spark/util/PartitionUtils.scala  |   6 +-
 .../spark/rdd/AggregateDataMapCompactor.scala   |  19 +-
 .../spark/rdd/CarbonDataRDDFactory.scala|  14 +-
 .../spark/rdd/CarbonTableCompactor.scala|   8 +-
 .../CarbonAlterTableCompactionCommand.scala |  13 +-
 

[11/50] [abbrv] carbondata git commit: [CARBONDATA-2099] Refactor query scan process to improve readability

2018-03-04 Thread jackylk
http://git-wip-us.apache.org/repos/asf/carbondata/blob/92c9f224/core/src/main/java/org/apache/carbondata/core/scan/filter/executer/RowLevelRangeLessThanFiterExecuterImpl.java
--
diff --git 
a/core/src/main/java/org/apache/carbondata/core/scan/filter/executer/RowLevelRangeLessThanFiterExecuterImpl.java
 
b/core/src/main/java/org/apache/carbondata/core/scan/filter/executer/RowLevelRangeLessThanFiterExecuterImpl.java
index 447ab46..547ecaa 100644
--- 
a/core/src/main/java/org/apache/carbondata/core/scan/filter/executer/RowLevelRangeLessThanFiterExecuterImpl.java
+++ 
b/core/src/main/java/org/apache/carbondata/core/scan/filter/executer/RowLevelRangeLessThanFiterExecuterImpl.java
@@ -22,7 +22,7 @@ import java.util.List;
 
 import org.apache.carbondata.core.constants.CarbonCommonConstants;
 import org.apache.carbondata.core.datastore.block.SegmentProperties;
-import org.apache.carbondata.core.datastore.chunk.DimensionColumnDataChunk;
+import org.apache.carbondata.core.datastore.chunk.DimensionColumnPage;
 import org.apache.carbondata.core.datastore.chunk.impl.DimensionRawColumnChunk;
 import org.apache.carbondata.core.datastore.chunk.impl.MeasureRawColumnChunk;
 import org.apache.carbondata.core.datastore.page.ColumnPage;
@@ -35,12 +35,11 @@ import org.apache.carbondata.core.metadata.encoder.Encoding;
 import org.apache.carbondata.core.metadata.schema.table.column.CarbonDimension;
 import org.apache.carbondata.core.metadata.schema.table.column.CarbonMeasure;
 import org.apache.carbondata.core.scan.expression.Expression;
-import 
org.apache.carbondata.core.scan.expression.exception.FilterUnsupportedException;
 import org.apache.carbondata.core.scan.filter.FilterUtil;
 import org.apache.carbondata.core.scan.filter.intf.RowIntf;
 import 
org.apache.carbondata.core.scan.filter.resolver.resolverinfo.DimColumnResolvedFilterInfo;
 import 
org.apache.carbondata.core.scan.filter.resolver.resolverinfo.MeasureColumnResolvedFilterInfo;
-import org.apache.carbondata.core.scan.processor.BlocksChunkHolder;
+import org.apache.carbondata.core.scan.processor.RawBlockletColumnChunks;
 import org.apache.carbondata.core.util.BitSetGroup;
 import org.apache.carbondata.core.util.ByteUtil;
 import org.apache.carbondata.core.util.CarbonUtil;
@@ -73,7 +72,7 @@ public class RowLevelRangeLessThanFiterExecuterImpl extends 
RowLevelFilterExecut
   comparator = 
Comparator.getComparatorByDataTypeForMeasure(measure.getDataType());
 }
 ifDefaultValueMatchesFilter();
-if (isDimensionPresentInCurrentBlock[0] == true) {
+if (isDimensionPresentInCurrentBlock[0]) {
   isNaturalSorted = 
dimColEvaluatorInfoList.get(0).getDimension().isUseInvertedIndex()
   && dimColEvaluatorInfoList.get(0).getDimension().isSortColumn();
 }
@@ -120,11 +119,11 @@ public class RowLevelRangeLessThanFiterExecuterImpl 
extends RowLevelFilterExecut
 boolean isScanRequired = false;
 if (isMeasurePresentInCurrentBlock[0] || 
isDimensionPresentInCurrentBlock[0]) {
   if (isMeasurePresentInCurrentBlock[0]) {
-minValue = blockMinValue[measureBlocksIndex[0] + 
lastDimensionColOrdinal];
+minValue = blockMinValue[measureChunkIndex[0] + 
lastDimensionColOrdinal];
 isScanRequired =
 isScanRequired(minValue, msrFilterRangeValues, 
msrColEvalutorInfoList.get(0).getType());
   } else {
-minValue = blockMinValue[dimensionBlocksIndex[0]];
+minValue = blockMinValue[dimensionChunkIndex[0]];
 isScanRequired = isScanRequired(minValue, filterRangeValues);
   }
 } else {
@@ -170,67 +169,69 @@ public class RowLevelRangeLessThanFiterExecuterImpl 
extends RowLevelFilterExecut
   }
 
   @Override
-  public BitSetGroup applyFilter(BlocksChunkHolder blockChunkHolder, boolean 
useBitsetPipeLine)
-  throws FilterUnsupportedException, IOException {
+  public BitSetGroup applyFilter(RawBlockletColumnChunks 
rawBlockletColumnChunks,
+  boolean useBitsetPipeLine) throws IOException {
 // select all rows if dimension does not exists in the current block
 if (!isDimensionPresentInCurrentBlock[0] && 
!isMeasurePresentInCurrentBlock[0]) {
-  int numberOfRows = blockChunkHolder.getDataBlock().nodeSize();
+  int numberOfRows = rawBlockletColumnChunks.getDataBlock().numRows();
   return FilterUtil
-  
.createBitSetGroupWithDefaultValue(blockChunkHolder.getDataBlock().numberOfPages(),
+  
.createBitSetGroupWithDefaultValue(rawBlockletColumnChunks.getDataBlock().numberOfPages(),
   numberOfRows, true);
 }
 if (isDimensionPresentInCurrentBlock[0]) {
-  int blockIndex =
-  
segmentProperties.getDimensionOrdinalToBlockMapping().get(dimensionBlocksIndex[0]);
-  if (null == blockChunkHolder.getDimensionRawDataChunk()[blockIndex]) {
-blockChunkHolder.getDimensionRawDataChunk()[blockIndex] = 
blockChunkHolder.getDataBlock()
-

[15/50] [abbrv] carbondata git commit: [CARBONDATA-2099] Refactor query scan process to improve readability

2018-03-04 Thread jackylk
http://git-wip-us.apache.org/repos/asf/carbondata/blob/92c9f224/core/src/main/java/org/apache/carbondata/core/metadata/blocklet/SegmentInfo.java
--
diff --git 
a/core/src/main/java/org/apache/carbondata/core/metadata/blocklet/SegmentInfo.java
 
b/core/src/main/java/org/apache/carbondata/core/metadata/blocklet/SegmentInfo.java
index 0cb2918..099fffd 100644
--- 
a/core/src/main/java/org/apache/carbondata/core/metadata/blocklet/SegmentInfo.java
+++ 
b/core/src/main/java/org/apache/carbondata/core/metadata/blocklet/SegmentInfo.java
@@ -29,31 +29,12 @@ public class SegmentInfo implements Serializable {
   private static final long serialVersionUID = -174987462709431L;
 
   /**
-   * number of column in the segment
-   */
-  private int numberOfColumns;
-
-  /**
* cardinality of each columns
* column which is not participating in the multidimensional key cardinality 
will be -1;
*/
   private int[] columnCardinality;
 
   /**
-   * @return the numberOfColumns
-   */
-  public int getNumberOfColumns() {
-return numberOfColumns;
-  }
-
-  /**
-   * @param numberOfColumns the numberOfColumns to set
-   */
-  public void setNumberOfColumns(int numberOfColumns) {
-this.numberOfColumns = numberOfColumns;
-  }
-
-  /**
* @return the columnCardinality
*/
   public int[] getColumnCardinality() {

http://git-wip-us.apache.org/repos/asf/carbondata/blob/92c9f224/core/src/main/java/org/apache/carbondata/core/metadata/schema/table/CarbonTable.java
--
diff --git 
a/core/src/main/java/org/apache/carbondata/core/metadata/schema/table/CarbonTable.java
 
b/core/src/main/java/org/apache/carbondata/core/metadata/schema/table/CarbonTable.java
index 6036569..d17d865 100644
--- 
a/core/src/main/java/org/apache/carbondata/core/metadata/schema/table/CarbonTable.java
+++ 
b/core/src/main/java/org/apache/carbondata/core/metadata/schema/table/CarbonTable.java
@@ -19,7 +19,13 @@ package org.apache.carbondata.core.metadata.schema.table;
 
 import java.io.IOException;
 import java.io.Serializable;
-import java.util.*;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.Comparator;
+import java.util.HashMap;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Map;
 
 import org.apache.carbondata.core.constants.CarbonCommonConstants;
 import org.apache.carbondata.core.metadata.AbsoluteTableIdentifier;
@@ -33,7 +39,10 @@ import 
org.apache.carbondata.core.metadata.schema.table.column.CarbonDimension;
 import 
org.apache.carbondata.core.metadata.schema.table.column.CarbonImplicitDimension;
 import org.apache.carbondata.core.metadata.schema.table.column.CarbonMeasure;
 import org.apache.carbondata.core.metadata.schema.table.column.ColumnSchema;
+import org.apache.carbondata.core.scan.model.QueryModel;
+import org.apache.carbondata.core.scan.model.QueryProjection;
 import org.apache.carbondata.core.util.CarbonUtil;
+import org.apache.carbondata.core.util.DataTypeConverter;
 import org.apache.carbondata.core.util.DataTypeUtil;
 import org.apache.carbondata.core.util.path.CarbonTablePath;
 
@@ -136,10 +145,7 @@ public class CarbonTable implements Serializable {
   /**
* During creation of TableInfo from hivemetastore the DataMapSchemas and 
the columns
* DataTypes are not converted to the appropriate child classes.
-   *
* This method will cast the same to the appropriate classes
-   *
-   * @param tableInfo
*/
   public static void updateTableInfo(TableInfo tableInfo) {
 List dataMapSchemas = new ArrayList<>();
@@ -153,8 +159,9 @@ public class CarbonTable implements Serializable {
 }
 tableInfo.setDataMapSchemaList(dataMapSchemas);
 for (ColumnSchema columnSchema : 
tableInfo.getFactTable().getListOfColumns()) {
-  columnSchema.setDataType(DataTypeUtil.valueOf(columnSchema.getDataType(),
-  columnSchema.getPrecision(), columnSchema.getScale()));
+  columnSchema.setDataType(
+  DataTypeUtil.valueOf(
+  columnSchema.getDataType(), columnSchema.getPrecision(), 
columnSchema.getScale()));
 }
 List childSchema = tableInfo.getDataMapSchemaList();
 for (DataMapSchema dataMapSchema : childSchema) {
@@ -168,10 +175,11 @@ public class CarbonTable implements Serializable {
   }
 }
 if (tableInfo.getFactTable().getBucketingInfo() != null) {
-  for (ColumnSchema columnSchema : tableInfo.getFactTable()
-  .getBucketingInfo().getListOfColumns()) {
-
columnSchema.setDataType(DataTypeUtil.valueOf(columnSchema.getDataType(),
-columnSchema.getPrecision(), columnSchema.getScale()));
+  for (ColumnSchema columnSchema :
+  tableInfo.getFactTable().getBucketingInfo().getListOfColumns()) {
+columnSchema.setDataType(
+DataTypeUtil.valueOf(
+columnSchema.getDataType(), columnSchema.getPrecision(), 

[09/50] [abbrv] carbondata git commit: [CARBONDATA-2099] Refactor query scan process to improve readability

2018-03-04 Thread jackylk
http://git-wip-us.apache.org/repos/asf/carbondata/blob/92c9f224/core/src/main/java/org/apache/carbondata/core/scan/result/iterator/PartitionSpliterRawResultIterator.java
--
diff --git 
a/core/src/main/java/org/apache/carbondata/core/scan/result/iterator/PartitionSpliterRawResultIterator.java
 
b/core/src/main/java/org/apache/carbondata/core/scan/result/iterator/PartitionSpliterRawResultIterator.java
index 553f85e..773fbd7 100644
--- 
a/core/src/main/java/org/apache/carbondata/core/scan/result/iterator/PartitionSpliterRawResultIterator.java
+++ 
b/core/src/main/java/org/apache/carbondata/core/scan/result/iterator/PartitionSpliterRawResultIterator.java
@@ -17,20 +17,15 @@
 package org.apache.carbondata.core.scan.result.iterator;
 
 import org.apache.carbondata.common.CarbonIterator;
-import org.apache.carbondata.common.logging.LogService;
-import org.apache.carbondata.common.logging.LogServiceFactory;
-import org.apache.carbondata.core.scan.result.BatchResult;
+import org.apache.carbondata.core.scan.result.RowBatch;
 
 public class PartitionSpliterRawResultIterator extends 
CarbonIterator {
 
-  private CarbonIterator iterator;
-  private BatchResult batch;
+  private CarbonIterator iterator;
+  private RowBatch batch;
   private int counter;
 
-  private static final LogService LOGGER =
-  
LogServiceFactory.getLogService(PartitionSpliterRawResultIterator.class.getName());
-
-  public PartitionSpliterRawResultIterator(CarbonIterator 
iterator) {
+  public PartitionSpliterRawResultIterator(CarbonIterator iterator) {
 this.iterator = iterator;
   }
 
@@ -65,7 +60,7 @@ public class PartitionSpliterRawResultIterator extends 
CarbonIterator
* @param batch
* @return
*/
-  private boolean checkBatchEnd(BatchResult batch) {
+  private boolean checkBatchEnd(RowBatch batch) {
 return !(counter < batch.getSize());
   }
 

http://git-wip-us.apache.org/repos/asf/carbondata/blob/92c9f224/core/src/main/java/org/apache/carbondata/core/scan/result/iterator/RawResultIterator.java
--
diff --git 
a/core/src/main/java/org/apache/carbondata/core/scan/result/iterator/RawResultIterator.java
 
b/core/src/main/java/org/apache/carbondata/core/scan/result/iterator/RawResultIterator.java
index 70d0958..1dd1595 100644
--- 
a/core/src/main/java/org/apache/carbondata/core/scan/result/iterator/RawResultIterator.java
+++ 
b/core/src/main/java/org/apache/carbondata/core/scan/result/iterator/RawResultIterator.java
@@ -21,7 +21,7 @@ import org.apache.carbondata.common.logging.LogService;
 import org.apache.carbondata.common.logging.LogServiceFactory;
 import org.apache.carbondata.core.datastore.block.SegmentProperties;
 import org.apache.carbondata.core.keygenerator.KeyGenException;
-import org.apache.carbondata.core.scan.result.BatchResult;
+import org.apache.carbondata.core.scan.result.RowBatch;
 import org.apache.carbondata.core.scan.wrappers.ByteArrayWrapper;
 
 /**
@@ -37,7 +37,7 @@ public class RawResultIterator extends 
CarbonIterator {
   /**
* Iterator of the Batch raw result.
*/
-  private CarbonIterator detailRawQueryResultIterator;
+  private CarbonIterator detailRawQueryResultIterator;
 
   /**
* Counter to maintain the row counter.
@@ -55,9 +55,9 @@ public class RawResultIterator extends 
CarbonIterator {
   /**
* batch of the result.
*/
-  private BatchResult batch;
+  private RowBatch batch;
 
-  public RawResultIterator(CarbonIterator 
detailRawQueryResultIterator,
+  public RawResultIterator(CarbonIterator 
detailRawQueryResultIterator,
   SegmentProperties sourceSegProperties, SegmentProperties 
destinationSegProperties) {
 this.detailRawQueryResultIterator = detailRawQueryResultIterator;
 this.sourceSegProperties = sourceSegProperties;
@@ -155,7 +155,7 @@ public class RawResultIterator extends 
CarbonIterator {
* @param batch
* @return
*/
-  private boolean checkIfBatchIsProcessedCompletely(BatchResult batch) {
+  private boolean checkIfBatchIsProcessedCompletely(RowBatch batch) {
 if (counter < batch.getSize()) {
   return false;
 } else {

http://git-wip-us.apache.org/repos/asf/carbondata/blob/92c9f224/core/src/main/java/org/apache/carbondata/core/scan/result/iterator/VectorDetailQueryResultIterator.java
--
diff --git 
a/core/src/main/java/org/apache/carbondata/core/scan/result/iterator/VectorDetailQueryResultIterator.java
 
b/core/src/main/java/org/apache/carbondata/core/scan/result/iterator/VectorDetailQueryResultIterator.java
index cc9710e..c7cb00d 100644
--- 
a/core/src/main/java/org/apache/carbondata/core/scan/result/iterator/VectorDetailQueryResultIterator.java
+++ 
b/core/src/main/java/org/apache/carbondata/core/scan/result/iterator/VectorDetailQueryResultIterator.java
@@ -35,10 +35,12 

[06/50] [abbrv] carbondata git commit: [CARBONDATA-2099] Refactor query scan process to improve readability

2018-03-04 Thread jackylk
http://git-wip-us.apache.org/repos/asf/carbondata/blob/92c9f224/processing/src/main/java/org/apache/carbondata/processing/merger/CarbonCompactionExecutor.java
--
diff --git 
a/processing/src/main/java/org/apache/carbondata/processing/merger/CarbonCompactionExecutor.java
 
b/processing/src/main/java/org/apache/carbondata/processing/merger/CarbonCompactionExecutor.java
index f51ced3..6a401d8 100644
--- 
a/processing/src/main/java/org/apache/carbondata/processing/merger/CarbonCompactionExecutor.java
+++ 
b/processing/src/main/java/org/apache/carbondata/processing/merger/CarbonCompactionExecutor.java
@@ -34,20 +34,16 @@ import 
org.apache.carbondata.core.datastore.block.TableBlockInfo;
 import org.apache.carbondata.core.datastore.block.TaskBlockInfo;
 import org.apache.carbondata.core.metadata.blocklet.DataFileFooter;
 import org.apache.carbondata.core.metadata.schema.table.CarbonTable;
-import org.apache.carbondata.core.metadata.schema.table.column.CarbonDimension;
-import org.apache.carbondata.core.metadata.schema.table.column.CarbonMeasure;
 import org.apache.carbondata.core.metadata.schema.table.column.ColumnSchema;
 import org.apache.carbondata.core.scan.executor.QueryExecutor;
 import org.apache.carbondata.core.scan.executor.QueryExecutorFactory;
 import 
org.apache.carbondata.core.scan.executor.exception.QueryExecutionException;
-import org.apache.carbondata.core.scan.model.QueryDimension;
-import org.apache.carbondata.core.scan.model.QueryMeasure;
 import org.apache.carbondata.core.scan.model.QueryModel;
-import org.apache.carbondata.core.scan.result.BatchResult;
+import org.apache.carbondata.core.scan.result.RowBatch;
 import org.apache.carbondata.core.scan.result.iterator.RawResultIterator;
 import org.apache.carbondata.core.util.CarbonProperties;
 import org.apache.carbondata.core.util.CarbonUtil;
-import org.apache.carbondata.core.util.DataTypeUtil;
+import org.apache.carbondata.core.util.DataTypeConverter;
 
 /**
  * Executor class for executing the query on the selected segments to be 
merged.
@@ -70,6 +66,9 @@ public class CarbonCompactionExecutor {
*/
   private boolean restructuredBlockExists;
 
+  // converter for UTF8String and decimal conversion
+  private DataTypeConverter dataTypeConverter;
+
   /**
* Constructor
*
@@ -82,13 +81,14 @@ public class CarbonCompactionExecutor {
   public CarbonCompactionExecutor(Map segmentMapping,
   SegmentProperties segmentProperties, CarbonTable carbonTable,
   Map dataFileMetadataSegMapping,
-  boolean restructuredBlockExists) {
+  boolean restructuredBlockExists, DataTypeConverter dataTypeConverter) {
 this.segmentMapping = segmentMapping;
 this.destinationSegProperties = segmentProperties;
 this.carbonTable = carbonTable;
 this.dataFileMetadataSegMapping = dataFileMetadataSegMapping;
 this.restructuredBlockExists = restructuredBlockExists;
-queryExecutorList = new 
ArrayList<>(CarbonCommonConstants.DEFAULT_COLLECTION_SIZE);
+this.queryExecutorList = new 
ArrayList<>(CarbonCommonConstants.DEFAULT_COLLECTION_SIZE);
+this.dataTypeConverter = dataTypeConverter;
   }
 
   /**
@@ -100,7 +100,9 @@ public class CarbonCompactionExecutor {
 List resultList =
 new ArrayList<>(CarbonCommonConstants.DEFAULT_COLLECTION_SIZE);
 List list = null;
-queryModel = prepareQueryModel(list);
+queryModel = 
carbonTable.createQueryModelWithProjectAllColumns(dataTypeConverter);
+queryModel.setReadPageByPage(enablePageLevelReaderForCompaction());
+queryModel.setForcedDetailRawQuery(true);
 // iterate each seg ID
 for (Map.Entry taskMap : segmentMapping.entrySet()) 
{
   String segmentId = taskMap.getKey();
@@ -156,7 +158,7 @@ public class CarbonCompactionExecutor {
* @param blockList
* @return
*/
-  private CarbonIterator executeBlockList(List 
blockList)
+  private CarbonIterator executeBlockList(List 
blockList)
   throws QueryExecutionException, IOException {
 queryModel.setTableBlockInfos(blockList);
 QueryExecutor queryExecutor = 
QueryExecutorFactory.getQueryExecutor(queryModel);
@@ -195,48 +197,6 @@ public class CarbonCompactionExecutor {
   }
 
   /**
-   * Preparing of the query model.
-   *
-   * @param blockList
-   * @return
-   */
-  private QueryModel prepareQueryModel(List blockList) {
-QueryModel model = new QueryModel();
-model.setTableBlockInfos(blockList);
-model.setForcedDetailRawQuery(true);
-model.setFilterExpressionResolverTree(null);
-model.setConverter(DataTypeUtil.getDataTypeConverter());
-model.setReadPageByPage(enablePageLevelReaderForCompaction());
-
-List dims = new 
ArrayList<>(CarbonCommonConstants.DEFAULT_COLLECTION_SIZE);
-
-List dimensions =
-carbonTable.getDimensionByTableName(carbonTable.getTableName());
-for (CarbonDimension dim : dimensions) {
- 

[40/50] [abbrv] carbondata git commit: Revert "[CARBONDATA-2018][DataLoad] Optimization in reading/writing for sort temp row"

2018-03-04 Thread jackylk
Revert "[CARBONDATA-2018][DataLoad] Optimization in reading/writing for sort 
temp row"

This reverts commit de92ea9a123b17d903f2d1d4662299315c792954.


Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo
Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/46031a32
Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/46031a32
Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/46031a32

Branch: refs/heads/carbonstore
Commit: 46031a320506ceed10b2134710be6c630c6ee533
Parents: 1d85e91
Author: Jacky Li 
Authored: Sat Feb 10 20:11:25 2018 +0800
Committer: Jacky Li 
Committed: Sun Mar 4 20:32:13 2018 +0800

--
 .../carbondata/core/util/NonDictionaryUtil.java |  67 ++-
 .../presto/util/CarbonDataStoreCreator.scala|   1 +
 .../load/DataLoadProcessorStepOnSpark.scala |   6 +-
 .../loading/row/IntermediateSortTempRow.java| 117 -
 .../loading/sort/SortStepRowHandler.java| 466 ---
 .../loading/sort/SortStepRowUtil.java   | 103 
 .../sort/unsafe/UnsafeCarbonRowPage.java| 331 +++--
 .../loading/sort/unsafe/UnsafeSortDataRows.java |  57 ++-
 .../unsafe/comparator/UnsafeRowComparator.java  |  95 ++--
 .../UnsafeRowComparatorForNormalDIms.java   |  59 +++
 .../UnsafeRowComparatorForNormalDims.java   |  59 ---
 .../sort/unsafe/holder/SortTempChunkHolder.java |   3 +-
 .../holder/UnsafeFinalMergePageHolder.java  |  19 +-
 .../unsafe/holder/UnsafeInmemoryHolder.java |  21 +-
 .../holder/UnsafeSortTempFileChunkHolder.java   | 138 --
 .../merger/UnsafeIntermediateFileMerger.java| 118 -
 .../UnsafeSingleThreadFinalSortFilesMerger.java |  27 +-
 .../merger/CompactionResultSortProcessor.java   |   1 +
 .../sort/sortdata/IntermediateFileMerger.java   |  95 +++-
 .../IntermediateSortTempRowComparator.java  |  73 ---
 .../sort/sortdata/NewRowComparator.java |   5 +-
 .../sortdata/NewRowComparatorForNormalDims.java |   3 +-
 .../processing/sort/sortdata/RowComparator.java |  94 
 .../sortdata/RowComparatorForNormalDims.java|  62 +++
 .../SingleThreadFinalSortFilesMerger.java   |  25 +-
 .../processing/sort/sortdata/SortDataRows.java  |  85 +++-
 .../sort/sortdata/SortTempFileChunkHolder.java  | 174 +--
 .../sort/sortdata/TableFieldStat.java   | 176 ---
 28 files changed, 1294 insertions(+), 1186 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/carbondata/blob/46031a32/core/src/main/java/org/apache/carbondata/core/util/NonDictionaryUtil.java
--
diff --git 
a/core/src/main/java/org/apache/carbondata/core/util/NonDictionaryUtil.java 
b/core/src/main/java/org/apache/carbondata/core/util/NonDictionaryUtil.java
index fca1244..d6ecfbc 100644
--- a/core/src/main/java/org/apache/carbondata/core/util/NonDictionaryUtil.java
+++ b/core/src/main/java/org/apache/carbondata/core/util/NonDictionaryUtil.java
@@ -82,26 +82,18 @@ public class NonDictionaryUtil {
   }
 
   /**
-   * Method to get the required dictionary Dimension from obj []
+   * Method to get the required Dimension from obj []
*
* @param index
* @param row
* @return
*/
-  public static int getDictDimension(int index, Object[] row) {
-int[] dimensions = (int[]) row[WriteStepRowUtil.DICTIONARY_DIMENSION];
+  public static Integer getDimension(int index, Object[] row) {
+
+Integer[] dimensions = (Integer[]) 
row[WriteStepRowUtil.DICTIONARY_DIMENSION];
+
 return dimensions[index];
-  }
 
-  /**
-   * Method to get the required non-dictionary & complex from 3-parted row
-   * @param index
-   * @param row
-   * @return
-   */
-  public static byte[] getNoDictOrComplex(int index, Object[] row) {
-byte[][] nonDictArray = (byte[][]) 
row[WriteStepRowUtil.NO_DICTIONARY_AND_COMPLEX];
-return nonDictArray[index];
   }
 
   /**
@@ -116,11 +108,60 @@ public class NonDictionaryUtil {
 return measures[index];
   }
 
+  public static byte[] getByteArrayForNoDictionaryCols(Object[] row) {
+
+return (byte[]) row[WriteStepRowUtil.NO_DICTIONARY_AND_COMPLEX];
+  }
+
   public static void prepareOutObj(Object[] out, int[] dimArray, byte[][] 
byteBufferArr,
   Object[] measureArray) {
+
 out[WriteStepRowUtil.DICTIONARY_DIMENSION] = dimArray;
 out[WriteStepRowUtil.NO_DICTIONARY_AND_COMPLEX] = byteBufferArr;
 out[WriteStepRowUtil.MEASURE] = measureArray;
 
   }
+
+  /**
+   * This method will extract the single dimension from the complete high card 
dims byte[].+ *
+   * The format of the byte [] will be,  Totallength,CompleteStartOffsets,Dat
+   *
+   * @param highCardArr
+   * @param index
+   * @param highCardinalityCount
+   * @param outBuffer
+   */
+  public static void extractSingleHighCardDims(byte[] highCardArr, 

[32/50] [abbrv] carbondata git commit: [CARBONDATA-1544][Datamap] Datamap FineGrain implementation

2018-03-04 Thread jackylk
[CARBONDATA-1544][Datamap] Datamap FineGrain implementation

Implemented interfaces for FG datamap and integrated to filterscanner to use 
the pruned bitset from FG datamap.
FG Query flow as follows.
1.The user can add FG datamap to any table and implement there interfaces.
2. Any filter query which hits the table with datamap will call prune method of 
FGdatamap.
3. The prune method of FGDatamap return list FineGrainBlocklet , these 
blocklets contain the information of block, blocklet, page and rowids 
information as well.
4. The pruned blocklets are internally wriitten to file and returns only the 
block , blocklet and filepath information as part of Splits.
5. Based on the splits scanrdd schedule the tasks.
6. In filterscanner we check the datamapwriterpath from split and reNoteads the 
bitset if exists. And pass this bitset as input to it.

This closes #1471


Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo
Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/bb5bb00a
Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/bb5bb00a
Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/bb5bb00a

Branch: refs/heads/carbonstore
Commit: bb5bb00af982831ea83c73a2c437aa4aea8a5422
Parents: f9d15a2
Author: ravipesala 
Authored: Wed Nov 15 19:48:40 2017 +0530
Committer: Jacky Li 
Committed: Sun Mar 4 20:32:12 2018 +0800

--
 .../carbondata/core/datamap/DataMapMeta.java|   8 +-
 .../core/datamap/DataMapStoreManager.java   |  30 +-
 .../carbondata/core/datamap/DataMapType.java|  21 +
 .../carbondata/core/datamap/TableDataMap.java   |  33 +-
 .../core/datamap/dev/AbstractDataMapWriter.java | 110 +
 .../core/datamap/dev/BlockletSerializer.java|  57 +++
 .../carbondata/core/datamap/dev/DataMap.java|   5 +-
 .../core/datamap/dev/DataMapFactory.java|  14 +-
 .../core/datamap/dev/DataMapWriter.java |  57 ---
 .../cgdatamap/AbstractCoarseGrainDataMap.java   |  24 +
 .../AbstractCoarseGrainDataMapFactory.java  |  34 ++
 .../dev/fgdatamap/AbstractFineGrainDataMap.java |  24 +
 .../AbstractFineGrainDataMapFactory.java|  38 ++
 .../carbondata/core/datastore/DataRefNode.java  |   6 +
 .../core/datastore/block/TableBlockInfo.java|  10 +
 .../impl/btree/AbstractBTreeLeafNode.java   |   5 +
 .../datastore/impl/btree/BTreeNonLeafNode.java  |   5 +
 .../carbondata/core/indexstore/Blocklet.java|  30 +-
 .../indexstore/BlockletDataMapIndexStore.java   |   6 -
 .../core/indexstore/BlockletDetailsFetcher.java |   9 +
 .../core/indexstore/ExtendedBlocklet.java   |  17 +
 .../core/indexstore/FineGrainBlocklet.java  | 120 +
 .../indexstore/SegmentPropertiesFetcher.java|   3 +-
 .../blockletindex/BlockletDataMap.java  |  18 +-
 .../blockletindex/BlockletDataMapFactory.java   |  66 ++-
 .../blockletindex/BlockletDataRefNode.java  |  27 +-
 .../indexstore/blockletindex/IndexWrapper.java  |  18 +
 .../core/indexstore/schema/FilterType.java  |  24 -
 .../executer/ExcludeFilterExecuterImpl.java |   3 +
 .../executer/IncludeFilterExecuterImpl.java |   3 +
 .../scanner/impl/BlockletFilterScanner.java |   2 +
 .../apache/carbondata/core/util/CarbonUtil.java |  97 
 datamap/examples/pom.xml|   2 +-
 .../datamap/examples/MinMaxDataMap.java |  33 +-
 .../datamap/examples/MinMaxDataMapFactory.java  |  67 +--
 .../datamap/examples/MinMaxDataWriter.java  |  36 +-
 .../examples/MinMaxIndexBlockDetails.java   |  13 -
 .../carbondata/hadoop/CarbonInputSplit.java |  21 +-
 .../hadoop/api/CarbonTableInputFormat.java  |  17 +-
 .../testsuite/datamap/CGDataMapTestCase.scala   | 361 +++
 .../testsuite/datamap/DataMapWriterSuite.scala  |  43 +-
 .../testsuite/datamap/FGDataMapTestCase.scala   | 440 +++
 .../TestInsertAndOtherCommandConcurrent.scala   |  21 +-
 .../carbondata/spark/rdd/CarbonScanRDD.scala|   7 +-
 .../TestStreamingTableOperation.scala   |   5 +-
 .../datamap/DataMapWriterListener.java  |  57 ++-
 .../store/CarbonFactDataHandlerModel.java   |  10 +-
 .../store/writer/AbstractFactDataWriter.java| 128 +-
 48 files changed, 1784 insertions(+), 401 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/carbondata/blob/bb5bb00a/core/src/main/java/org/apache/carbondata/core/datamap/DataMapMeta.java
--
diff --git 
a/core/src/main/java/org/apache/carbondata/core/datamap/DataMapMeta.java 
b/core/src/main/java/org/apache/carbondata/core/datamap/DataMapMeta.java
index 7746acf..dd15ccb 100644
--- a/core/src/main/java/org/apache/carbondata/core/datamap/DataMapMeta.java
+++ b/core/src/main/java/org/apache/carbondata/core/datamap/DataMapMeta.java
@@ -19,15 

[24/50] [abbrv] carbondata git commit: [REBASE] Solve conflict after rebasing master

2018-03-04 Thread jackylk
[REBASE] Solve conflict after rebasing master


Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo
Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/0bb4aed6
Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/0bb4aed6
Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/0bb4aed6

Branch: refs/heads/carbonstore
Commit: 0bb4aed60a7b60eed49e0b5e618af269c0c03a73
Parents: 586ab70
Author: Jacky Li 
Authored: Fri Feb 9 01:39:20 2018 +0800
Committer: Jacky Li 
Committed: Sun Mar 4 20:32:12 2018 +0800

--
 .../filter/executer/FalseFilterExecutor.java| 10 +
 .../core/util/path/CarbonTablePath.java | 23 +++-
 .../CarbonAlterTableDataTypeChangeCommand.scala |  7 +++---
 .../schema/CarbonAlterTableRenameCommand.scala  |  3 ++-
 .../apache/spark/sql/hive/CarbonMetaStore.scala |  9 +---
 .../processing/util/CarbonLoaderUtil.java   |  4 ++--
 6 files changed, 28 insertions(+), 28 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/carbondata/blob/0bb4aed6/core/src/main/java/org/apache/carbondata/core/scan/filter/executer/FalseFilterExecutor.java
--
diff --git 
a/core/src/main/java/org/apache/carbondata/core/scan/filter/executer/FalseFilterExecutor.java
 
b/core/src/main/java/org/apache/carbondata/core/scan/filter/executer/FalseFilterExecutor.java
index 1c918bc..ee3b0fc 100644
--- 
a/core/src/main/java/org/apache/carbondata/core/scan/filter/executer/FalseFilterExecutor.java
+++ 
b/core/src/main/java/org/apache/carbondata/core/scan/filter/executer/FalseFilterExecutor.java
@@ -44,17 +44,19 @@ public class FalseFilterExecutor implements FilterExecuter {
 return group;
   }
 
-  @Override public boolean applyFilter(RowIntf value, int dimOrdinalMax)
+  @Override
+  public boolean applyFilter(RowIntf value, int dimOrdinalMax)
   throws FilterUnsupportedException, IOException {
 return false;
   }
 
-  @Override public BitSet isScanRequired(byte[][] blockMaxValue, byte[][] 
blockMinValue) {
-
+  @Override
+  public BitSet isScanRequired(byte[][] blockMaxValue, byte[][] blockMinValue) 
{
 return new BitSet();
   }
 
-  @Override public void readColumnChunks(RawBlockletColumnChunks 
blockChunkHolder) {
+  @Override
+  public void readColumnChunks(RawBlockletColumnChunks blockChunkHolder) {
 // Do Nothing
   }
 }

http://git-wip-us.apache.org/repos/asf/carbondata/blob/0bb4aed6/core/src/main/java/org/apache/carbondata/core/util/path/CarbonTablePath.java
--
diff --git 
a/core/src/main/java/org/apache/carbondata/core/util/path/CarbonTablePath.java 
b/core/src/main/java/org/apache/carbondata/core/util/path/CarbonTablePath.java
index b7b5e43..50c5a31 100644
--- 
a/core/src/main/java/org/apache/carbondata/core/util/path/CarbonTablePath.java
+++ 
b/core/src/main/java/org/apache/carbondata/core/util/path/CarbonTablePath.java
@@ -36,13 +36,13 @@ public class CarbonTablePath {
   private static final String DICTIONARY_META_EXT = ".dictmeta";
   private static final String SORT_INDEX_EXT = ".sortindex";
   private static final String SCHEMA_FILE = "schema";
-  public static final String TABLE_STATUS_FILE = "tablestatus";
   private static final String FACT_DIR = "Fact";
   private static final String SEGMENT_PREFIX = "Segment_";
   private static final String PARTITION_PREFIX = "Part";
   private static final String DATA_PART_PREFIX = "part-";
   private static final String BATCH_PREFIX = "_batchno";
 
+  public static final String TABLE_STATUS_FILE = "tablestatus";
   public static final String CARBON_DATA_EXT = ".carbondata";
   public static final String INDEX_FILE_EXT = ".carbonindex";
   public static final String MERGE_INDEX_FILE_EXT = ".carbonindexmerge";
@@ -140,20 +140,6 @@ public class CarbonTablePath {
   }
 
   /**
-   * Return metadata path based on `tablePath`
-   */
-  public static String getTableStatusPath(String tablePath) {
-return getMetadataPath(tablePath) + File.separator + TABLE_STATUS_FILE;
-  }
-
-  /**
-   * Return table status file path based on `tablePath`
-   */
-  public static String getTableStatusFilePath(String tablePath) {
-return getMetadataPath(tablePath) + CarbonCommonConstants.FILE_SEPARATOR + 
TABLE_STATUS_FILE;
-  }
-
-  /**
* Return absolute path of dictionary meta file
*/
   public static String getExternalDictionaryMetaFilePath(String 
dictionaryPath, String columnId) {
@@ -213,6 +199,13 @@ public class CarbonTablePath {
 }
   }
 
+  /**
+   * Return absolute path of table status file
+   */
+  public static String getTableStatusFilePath(String tablePath) {
+return getMetadataPath(tablePath) + File.separator + TABLE_STATUS_FILE;
+  }
+
   public static 

[37/50] [abbrv] carbondata git commit: [CARBONDATA-2018][DataLoad] Optimization in reading/writing for sort temp row

2018-03-04 Thread jackylk
[CARBONDATA-2018][DataLoad] Optimization in reading/writing for sort temp row

Pick up the no-sort fields in the row and pack them as bytes array and skip 
parsing them during merge sort to reduce CPU consumption

This closes #1792


Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo
Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/28b5720f
Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/28b5720f
Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/28b5720f

Branch: refs/heads/carbonstore
Commit: 28b5720fcf1cbd0d4bdf3f04e7b0edd8f9492a8d
Parents: dcfe73b
Author: xuchuanyin 
Authored: Thu Feb 8 14:35:14 2018 +0800
Committer: Jacky Li 
Committed: Sun Mar 4 20:32:13 2018 +0800

--
 .../carbondata/core/util/NonDictionaryUtil.java |  67 +--
 .../presto/util/CarbonDataStoreCreator.scala|   1 -
 .../load/DataLoadProcessorStepOnSpark.scala |   6 +-
 .../loading/row/IntermediateSortTempRow.java| 117 +
 .../loading/sort/SortStepRowHandler.java| 466 +++
 .../loading/sort/SortStepRowUtil.java   | 103 
 .../sort/unsafe/UnsafeCarbonRowPage.java| 331 ++---
 .../loading/sort/unsafe/UnsafeSortDataRows.java |  57 +--
 .../unsafe/comparator/UnsafeRowComparator.java  |  95 ++--
 .../UnsafeRowComparatorForNormalDIms.java   |  59 ---
 .../UnsafeRowComparatorForNormalDims.java   |  59 +++
 .../sort/unsafe/holder/SortTempChunkHolder.java |   3 +-
 .../holder/UnsafeFinalMergePageHolder.java  |  19 +-
 .../unsafe/holder/UnsafeInmemoryHolder.java |  21 +-
 .../holder/UnsafeSortTempFileChunkHolder.java   | 138 ++
 .../merger/UnsafeIntermediateFileMerger.java| 118 +
 .../UnsafeSingleThreadFinalSortFilesMerger.java |  27 +-
 .../merger/CompactionResultSortProcessor.java   |   1 -
 .../sort/sortdata/IntermediateFileMerger.java   |  95 +---
 .../IntermediateSortTempRowComparator.java  |  73 +++
 .../sort/sortdata/NewRowComparator.java |   5 +-
 .../sortdata/NewRowComparatorForNormalDims.java |   3 +-
 .../processing/sort/sortdata/RowComparator.java |  94 
 .../sortdata/RowComparatorForNormalDims.java|  62 ---
 .../SingleThreadFinalSortFilesMerger.java   |  25 +-
 .../processing/sort/sortdata/SortDataRows.java  |  85 +---
 .../sort/sortdata/SortTempFileChunkHolder.java  | 174 ++-
 .../sort/sortdata/TableFieldStat.java   | 176 +++
 28 files changed, 1186 insertions(+), 1294 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/carbondata/blob/28b5720f/core/src/main/java/org/apache/carbondata/core/util/NonDictionaryUtil.java
--
diff --git 
a/core/src/main/java/org/apache/carbondata/core/util/NonDictionaryUtil.java 
b/core/src/main/java/org/apache/carbondata/core/util/NonDictionaryUtil.java
index d6ecfbc..fca1244 100644
--- a/core/src/main/java/org/apache/carbondata/core/util/NonDictionaryUtil.java
+++ b/core/src/main/java/org/apache/carbondata/core/util/NonDictionaryUtil.java
@@ -82,18 +82,26 @@ public class NonDictionaryUtil {
   }
 
   /**
-   * Method to get the required Dimension from obj []
+   * Method to get the required dictionary Dimension from obj []
*
* @param index
* @param row
* @return
*/
-  public static Integer getDimension(int index, Object[] row) {
-
-Integer[] dimensions = (Integer[]) 
row[WriteStepRowUtil.DICTIONARY_DIMENSION];
-
+  public static int getDictDimension(int index, Object[] row) {
+int[] dimensions = (int[]) row[WriteStepRowUtil.DICTIONARY_DIMENSION];
 return dimensions[index];
+  }
 
+  /**
+   * Method to get the required non-dictionary & complex from 3-parted row
+   * @param index
+   * @param row
+   * @return
+   */
+  public static byte[] getNoDictOrComplex(int index, Object[] row) {
+byte[][] nonDictArray = (byte[][]) 
row[WriteStepRowUtil.NO_DICTIONARY_AND_COMPLEX];
+return nonDictArray[index];
   }
 
   /**
@@ -108,60 +116,11 @@ public class NonDictionaryUtil {
 return measures[index];
   }
 
-  public static byte[] getByteArrayForNoDictionaryCols(Object[] row) {
-
-return (byte[]) row[WriteStepRowUtil.NO_DICTIONARY_AND_COMPLEX];
-  }
-
   public static void prepareOutObj(Object[] out, int[] dimArray, byte[][] 
byteBufferArr,
   Object[] measureArray) {
-
 out[WriteStepRowUtil.DICTIONARY_DIMENSION] = dimArray;
 out[WriteStepRowUtil.NO_DICTIONARY_AND_COMPLEX] = byteBufferArr;
 out[WriteStepRowUtil.MEASURE] = measureArray;
 
   }
-
-  /**
-   * This method will extract the single dimension from the complete high card 
dims byte[].+ *
-   * The format of the byte [] will be,  Totallength,CompleteStartOffsets,Dat
-   *
-   * @param highCardArr
-   * @param index
-   * @param highCardinalityCount
-   * 

[01/50] [abbrv] carbondata git commit: [REBASE] Solve conflict after rebasing master [Forced Update!]

2018-03-04 Thread jackylk
Repository: carbondata
Updated Branches:
  refs/heads/carbonstore c738afbc2 -> 8104735fd (forced update)


[REBASE] Solve conflict after rebasing master


Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo
Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/bd40a0d7
Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/bd40a0d7
Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/bd40a0d7

Branch: refs/heads/carbonstore
Commit: bd40a0d73d2a7086caaa6773a2c6a1a45e24334c
Parents: 9086a1b
Author: Jacky Li 
Authored: Thu Feb 1 00:25:31 2018 +0800
Committer: Jacky Li 
Committed: Sun Mar 4 20:30:31 2018 +0800

--
 .../hadoop/util/CarbonInputFormatUtil.java  | 20 +++
 .../spark/rdd/NewCarbonDataLoadRDD.scala| 21 ++--
 .../org/apache/spark/sql/CarbonSession.scala|  5 ++---
 3 files changed, 24 insertions(+), 22 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/carbondata/blob/bd40a0d7/hadoop/src/main/java/org/apache/carbondata/hadoop/util/CarbonInputFormatUtil.java
--
diff --git 
a/hadoop/src/main/java/org/apache/carbondata/hadoop/util/CarbonInputFormatUtil.java
 
b/hadoop/src/main/java/org/apache/carbondata/hadoop/util/CarbonInputFormatUtil.java
index 514428b..056c27b 100644
--- 
a/hadoop/src/main/java/org/apache/carbondata/hadoop/util/CarbonInputFormatUtil.java
+++ 
b/hadoop/src/main/java/org/apache/carbondata/hadoop/util/CarbonInputFormatUtil.java
@@ -22,6 +22,8 @@ import java.text.SimpleDateFormat;
 import java.util.List;
 import java.util.Locale;
 
+import org.apache.carbondata.core.constants.CarbonCommonConstants;
+import org.apache.carbondata.core.datastore.impl.FileFactory;
 import org.apache.carbondata.core.metadata.AbsoluteTableIdentifier;
 import org.apache.carbondata.core.metadata.schema.table.CarbonTable;
 import org.apache.carbondata.core.metadata.schema.table.column.CarbonDimension;
@@ -39,6 +41,7 @@ import org.apache.carbondata.core.scan.model.QueryMeasure;
 import org.apache.carbondata.core.scan.model.QueryModel;
 import org.apache.carbondata.hadoop.api.CarbonTableInputFormat;
 
+import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.mapreduce.Job;
 import org.apache.hadoop.mapreduce.JobID;
@@ -159,4 +162,21 @@ public class CarbonInputFormatUtil {
 String jobtrackerID = createJobTrackerID(date);
 return new JobID(jobtrackerID, batch);
   }
+
+  public static void setS3Configurations(Configuration hadoopConf) {
+FileFactory.getConfiguration()
+.set("fs.s3a.access.key", hadoopConf.get("fs.s3a.access.key", ""));
+FileFactory.getConfiguration()
+.set("fs.s3a.secret.key", hadoopConf.get("fs.s3a.secret.key", ""));
+FileFactory.getConfiguration()
+.set("fs.s3a.endpoint", hadoopConf.get("fs.s3a.endpoint", ""));
+FileFactory.getConfiguration().set(CarbonCommonConstants.S3_ACCESS_KEY,
+hadoopConf.get(CarbonCommonConstants.S3_ACCESS_KEY, ""));
+FileFactory.getConfiguration().set(CarbonCommonConstants.S3_SECRET_KEY,
+hadoopConf.get(CarbonCommonConstants.S3_SECRET_KEY, ""));
+FileFactory.getConfiguration().set(CarbonCommonConstants.S3N_ACCESS_KEY,
+hadoopConf.get(CarbonCommonConstants.S3N_ACCESS_KEY, ""));
+FileFactory.getConfiguration().set(CarbonCommonConstants.S3N_SECRET_KEY,
+hadoopConf.get(CarbonCommonConstants.S3N_SECRET_KEY, ""));
+  }
 }

http://git-wip-us.apache.org/repos/asf/carbondata/blob/bd40a0d7/integration/spark-common/src/main/scala/org/apache/carbondata/spark/rdd/NewCarbonDataLoadRDD.scala
--
diff --git 
a/integration/spark-common/src/main/scala/org/apache/carbondata/spark/rdd/NewCarbonDataLoadRDD.scala
 
b/integration/spark-common/src/main/scala/org/apache/carbondata/spark/rdd/NewCarbonDataLoadRDD.scala
index 917fc88..e17824f 100644
--- 
a/integration/spark-common/src/main/scala/org/apache/carbondata/spark/rdd/NewCarbonDataLoadRDD.scala
+++ 
b/integration/spark-common/src/main/scala/org/apache/carbondata/spark/rdd/NewCarbonDataLoadRDD.scala
@@ -41,10 +41,10 @@ import 
org.apache.carbondata.common.logging.LogServiceFactory
 import org.apache.carbondata.common.logging.impl.StandardLogService
 import org.apache.carbondata.core.constants.CarbonCommonConstants
 import org.apache.carbondata.core.datastore.compression.CompressorFactory
-import org.apache.carbondata.core.datastore.impl.FileFactory
 import org.apache.carbondata.core.statusmanager.{LoadMetadataDetails, 
SegmentStatus}
 import org.apache.carbondata.core.util.{CarbonProperties, 
CarbonTimeStatisticsFactory, ThreadLocalTaskInfo}
 import org.apache.carbondata.core.util.path.CarbonTablePath
+import 

[13/50] [abbrv] carbondata git commit: [CARBONDATA-2099] Refactor query scan process to improve readability

2018-03-04 Thread jackylk
http://git-wip-us.apache.org/repos/asf/carbondata/blob/92c9f224/core/src/main/java/org/apache/carbondata/core/scan/filter/executer/ExcludeColGroupFilterExecuterImpl.java
--
diff --git 
a/core/src/main/java/org/apache/carbondata/core/scan/filter/executer/ExcludeColGroupFilterExecuterImpl.java
 
b/core/src/main/java/org/apache/carbondata/core/scan/filter/executer/ExcludeColGroupFilterExecuterImpl.java
index 9391ebd..44f7c07 100644
--- 
a/core/src/main/java/org/apache/carbondata/core/scan/filter/executer/ExcludeColGroupFilterExecuterImpl.java
+++ 
b/core/src/main/java/org/apache/carbondata/core/scan/filter/executer/ExcludeColGroupFilterExecuterImpl.java
@@ -16,20 +16,10 @@
  */
 package org.apache.carbondata.core.scan.filter.executer;
 
-import java.util.ArrayList;
 import java.util.BitSet;
-import java.util.List;
 
-import org.apache.carbondata.common.logging.LogService;
-import org.apache.carbondata.common.logging.LogServiceFactory;
 import org.apache.carbondata.core.datastore.block.SegmentProperties;
-import org.apache.carbondata.core.datastore.chunk.DimensionColumnDataChunk;
-import org.apache.carbondata.core.keygenerator.KeyGenException;
-import org.apache.carbondata.core.keygenerator.KeyGenerator;
-import org.apache.carbondata.core.scan.executor.infos.KeyStructureInfo;
-import org.apache.carbondata.core.scan.executor.util.QueryUtil;
 import 
org.apache.carbondata.core.scan.filter.resolver.resolverinfo.DimColumnResolvedFilterInfo;
-import org.apache.carbondata.core.util.ByteUtil;
 
 /**
  * It checks if filter is required on given block and if required, it does
@@ -38,12 +28,6 @@ import org.apache.carbondata.core.util.ByteUtil;
 public class ExcludeColGroupFilterExecuterImpl extends 
ExcludeFilterExecuterImpl {
 
   /**
-   * LOGGER
-   */
-  private static final LogService LOGGER =
-  
LogServiceFactory.getLogService(ExcludeColGroupFilterExecuterImpl.class.getName());
-
-  /**
* @param dimColResolvedFilterInfo
* @param segmentProperties
*/
@@ -53,54 +37,6 @@ public class ExcludeColGroupFilterExecuterImpl extends 
ExcludeFilterExecuterImpl
   }
 
   /**
-   * It fills BitSet with row index which matches filter key
-   */
-  protected BitSet getFilteredIndexes(DimensionColumnDataChunk 
dimensionColumnDataChunk,
-  int numerOfRows) {
-BitSet bitSet = new BitSet(numerOfRows);
-bitSet.flip(0, numerOfRows);
-try {
-  KeyStructureInfo keyStructureInfo = getKeyStructureInfo();
-  byte[][] filterValues = dimColumnExecuterInfo.getExcludeFilterKeys();
-  for (int i = 0; i < filterValues.length; i++) {
-byte[] filterVal = filterValues[i];
-for (int rowId = 0; rowId < numerOfRows; rowId++) {
-  byte[] colData = new 
byte[keyStructureInfo.getMaskByteRanges().length];
-  dimensionColumnDataChunk.fillChunkData(colData, 0, rowId, 
keyStructureInfo);
-  if (ByteUtil.UnsafeComparer.INSTANCE.compareTo(filterVal, colData) 
== 0) {
-bitSet.flip(rowId);
-  }
-}
-  }
-
-} catch (Exception e) {
-  LOGGER.error(e);
-}
-
-return bitSet;
-  }
-
-  /**
-   * It is required for extracting column data from columngroup chunk
-   *
-   * @return
-   * @throws KeyGenException
-   */
-  private KeyStructureInfo getKeyStructureInfo() throws KeyGenException {
-int colGrpId = getColumnGroupId(dimColEvaluatorInfo.getColumnIndex());
-KeyGenerator keyGenerator = 
segmentProperties.getColumnGroupAndItsKeygenartor().get(colGrpId);
-List mdKeyOrdinal = new ArrayList();
-mdKeyOrdinal.add(getMdkeyOrdinal(dimColEvaluatorInfo.getColumnIndex(), 
colGrpId));
-int[] maskByteRanges = 
QueryUtil.getMaskedByteRangeBasedOrdinal(mdKeyOrdinal, keyGenerator);
-byte[] maxKey = QueryUtil.getMaxKeyBasedOnOrinal(mdKeyOrdinal, 
keyGenerator);
-KeyStructureInfo restructureInfos = new KeyStructureInfo();
-restructureInfos.setKeyGenerator(keyGenerator);
-restructureInfos.setMaskByteRanges(maskByteRanges);
-restructureInfos.setMaxKey(maxKey);
-return restructureInfos;
-  }
-
-  /**
* Check if scan is required on given block based on min and max value
*/
   public BitSet isScanRequired(byte[][] blkMaxVal, byte[][] blkMinVal) {
@@ -109,25 +45,4 @@ public class ExcludeColGroupFilterExecuterImpl extends 
ExcludeFilterExecuterImpl
 return bitSet;
   }
 
-  private int getMdkeyOrdinal(int ordinal, int colGrpId) {
-return segmentProperties.getColumnGroupMdKeyOrdinal(colGrpId, ordinal);
-  }
-
-  private int getColumnGroupId(int ordinal) {
-int[][] columnGroups = segmentProperties.getColumnGroups();
-int colGrpId = -1;
-for (int i = 0; i < columnGroups.length; i++) {
-  if (columnGroups[i].length > 1) {
-colGrpId++;
-if (QueryUtil.searchInArray(columnGroups[i], ordinal)) {
-  break;
-}
-  }
-}
-return colGrpId;
-  }
-
-  public KeyGenerator getKeyGenerator(int 

[48/50] [abbrv] carbondata git commit: [CARBONDATA-1114][Tests] Fix bugs in tests in windows env

2018-03-04 Thread jackylk
[CARBONDATA-1114][Tests] Fix bugs in tests in windows env

Fix bugs in tests that will cause failure under windows env

This closes #1994


Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo
Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/ce88eb6a
Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/ce88eb6a
Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/ce88eb6a

Branch: refs/heads/carbonstore
Commit: ce88eb6a2d6d54acf15a2bdf2a9165ecc9570647
Parents: faad967
Author: xuchuanyin 
Authored: Sat Feb 24 21:18:17 2018 +0800
Committer: Jacky Li 
Committed: Sun Mar 4 20:32:14 2018 +0800

--
 .../carbondata/core/locks/LocalFileLock.java| 30 +++-
 .../store/impl/DFSFileReaderImplUnitTest.java   | 11 ---
 .../store/impl/FileFactoryImplUnitTest.java | 28 +-
 .../filesystem/HDFSCarbonFileTest.java  |  3 +-
 .../filesystem/LocalCarbonFileTest.java | 20 +
 .../BooleanDataTypesInsertTest.scala|  5 ++--
 .../carbondata/lcm/locks/LocalFileLockTest.java |  2 +-
 .../loading/csvinput/CSVInputFormatTest.java|  1 +
 8 files changed, 64 insertions(+), 36 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/carbondata/blob/ce88eb6a/core/src/main/java/org/apache/carbondata/core/locks/LocalFileLock.java
--
diff --git 
a/core/src/main/java/org/apache/carbondata/core/locks/LocalFileLock.java 
b/core/src/main/java/org/apache/carbondata/core/locks/LocalFileLock.java
index 75ea074..cb80877 100644
--- a/core/src/main/java/org/apache/carbondata/core/locks/LocalFileLock.java
+++ b/core/src/main/java/org/apache/carbondata/core/locks/LocalFileLock.java
@@ -17,17 +17,20 @@
 
 package org.apache.carbondata.core.locks;
 
-import java.io.FileOutputStream;
 import java.io.IOException;
 import java.nio.channels.FileChannel;
 import java.nio.channels.FileLock;
 import java.nio.channels.OverlappingFileLockException;
+import java.nio.file.Paths;
+import java.nio.file.StandardOpenOption;
 
 import org.apache.carbondata.common.logging.LogService;
 import org.apache.carbondata.common.logging.LogServiceFactory;
 import org.apache.carbondata.core.constants.CarbonCommonConstants;
+import org.apache.carbondata.core.datastore.filesystem.CarbonFile;
 import org.apache.carbondata.core.datastore.impl.FileFactory;
 import org.apache.carbondata.core.metadata.AbsoluteTableIdentifier;
+import org.apache.carbondata.core.util.CarbonUtil;
 
 /**
  * This class handles the file locking in the local file system.
@@ -40,11 +43,6 @@ public class LocalFileLock extends AbstractCarbonLock {
   private String location;
 
   /**
-   * fileOutputStream of the local lock file
-   */
-  private FileOutputStream fileOutputStream;
-
-  /**
* channel is the FileChannel of the lock file.
*/
   private FileChannel channel;
@@ -104,8 +102,8 @@ public class LocalFileLock extends AbstractCarbonLock {
 FileFactory.createNewLockFile(lockFilePath, 
FileFactory.getFileType(location));
   }
 
-  fileOutputStream = new FileOutputStream(lockFilePath);
-  channel = fileOutputStream.getChannel();
+  channel = FileChannel.open(Paths.get(lockFilePath), 
StandardOpenOption.WRITE,
+  StandardOpenOption.APPEND);
   try {
 fileLock = channel.tryLock();
   } catch (OverlappingFileLockException e) {
@@ -137,11 +135,17 @@ public class LocalFileLock extends AbstractCarbonLock {
 } catch (IOException e) {
   status = false;
 } finally {
-  if (null != fileOutputStream) {
-try {
-  fileOutputStream.close();
-} catch (IOException e) {
-  LOGGER.error(e.getMessage());
+  CarbonUtil.closeStreams(channel);
+
+  // deleting the lock file after releasing the lock.
+  if (null != lockFilePath) {
+CarbonFile lockFile = FileFactory.getCarbonFile(lockFilePath,
+FileFactory.getFileType(lockFilePath));
+if (!lockFile.exists() || lockFile.delete()) {
+  LOGGER.info("Successfully deleted the lock file " + lockFilePath);
+} else {
+  LOGGER.error("Not able to delete the lock file " + lockFilePath);
+  status = false;
 }
   }
 }

http://git-wip-us.apache.org/repos/asf/carbondata/blob/ce88eb6a/core/src/test/java/org/apache/carbondata/core/carbon/datastorage/filesystem/store/impl/DFSFileReaderImplUnitTest.java
--
diff --git 
a/core/src/test/java/org/apache/carbondata/core/carbon/datastorage/filesystem/store/impl/DFSFileReaderImplUnitTest.java
 
b/core/src/test/java/org/apache/carbondata/core/carbon/datastorage/filesystem/store/impl/DFSFileReaderImplUnitTest.java
index da61a94..30144c1 

[21/50] [abbrv] carbondata git commit: [CARBONDATA-2025] Unify all path construction through CarbonTablePath static method

2018-03-04 Thread jackylk
http://git-wip-us.apache.org/repos/asf/carbondata/blob/f06824e9/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/datacompaction/MajorCompactionIgnoreInMinorTest.scala
--
diff --git 
a/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/datacompaction/MajorCompactionIgnoreInMinorTest.scala
 
b/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/datacompaction/MajorCompactionIgnoreInMinorTest.scala
index 31a08fc..9afb890 100644
--- 
a/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/datacompaction/MajorCompactionIgnoreInMinorTest.scala
+++ 
b/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/datacompaction/MajorCompactionIgnoreInMinorTest.scala
@@ -26,7 +26,7 @@ import 
org.apache.carbondata.core.datastore.TableSegmentUniqueIdentifier
 import org.apache.carbondata.core.metadata.CarbonMetadata
 import org.apache.carbondata.core.statusmanager.{SegmentStatus, 
SegmentStatusManager}
 import org.apache.carbondata.core.util.CarbonProperties
-import org.apache.carbondata.core.util.path.CarbonStorePath
+import org.apache.carbondata.core.util.path.CarbonTablePath
 import org.apache.carbondata.hadoop.CacheClient
 import org.apache.spark.sql.test.util.QueryTest
 
@@ -112,11 +112,9 @@ class MajorCompactionIgnoreInMinorTest extends QueryTest 
with BeforeAndAfterAll
   CarbonCommonConstants.DATABASE_DEFAULT_NAME,
   "ignoremajor"
 )
-val absoluteTableIdentifier = carbonTable.getAbsoluteTableIdentifier
 
-val carbontablePath = 
CarbonStorePath.getCarbonTablePath(absoluteTableIdentifier)
-  .getMetadataDirectoryPath
-val segs = SegmentStatusManager.readLoadMetadata(carbontablePath)
+val carbonTablePath = carbonTable.getMetadataPath
+val segs = SegmentStatusManager.readLoadMetadata(carbonTablePath)
 
 // status should remain as compacted.
 assertResult(SegmentStatus.COMPACTED)(segs(3).getSegmentStatus)
@@ -134,9 +132,7 @@ class MajorCompactionIgnoreInMinorTest extends QueryTest 
with BeforeAndAfterAll
   CarbonCommonConstants.DATABASE_DEFAULT_NAME,
   "ignoremajor"
 )
-val absoluteTableIdentifier = carbonTable.getAbsoluteTableIdentifier
-val carbontablePath = CarbonStorePath
-  .getCarbonTablePath(absoluteTableIdentifier).getMetadataDirectoryPath
+val carbontablePath = carbonTable.getMetadataPath
 val segs = SegmentStatusManager.readLoadMetadata(carbontablePath)
 
 // status should remain as compacted for segment 2.

http://git-wip-us.apache.org/repos/asf/carbondata/blob/f06824e9/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/dataload/TestBatchSortDataLoad.scala
--
diff --git 
a/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/dataload/TestBatchSortDataLoad.scala
 
b/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/dataload/TestBatchSortDataLoad.scala
index 42ac4df..68a3058 100644
--- 
a/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/dataload/TestBatchSortDataLoad.scala
+++ 
b/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/dataload/TestBatchSortDataLoad.scala
@@ -29,7 +29,7 @@ import org.apache.spark.sql.test.util.QueryTest
 import 
org.apache.carbondata.core.indexstore.blockletindex.SegmentIndexFileStore
 import org.apache.carbondata.core.metadata.CarbonMetadata
 import org.apache.carbondata.core.metadata.schema.table.CarbonTable
-import org.apache.carbondata.core.util.path.CarbonStorePath
+import org.apache.carbondata.core.util.path.CarbonTablePath
 
 class TestBatchSortDataLoad extends QueryTest with BeforeAndAfterAll {
   var filePath: String = _
@@ -193,8 +193,7 @@ class TestBatchSortDataLoad extends QueryTest with 
BeforeAndAfterAll {
   CarbonCommonConstants.DATABASE_DEFAULT_NAME,
   tableName
 )
-val carbonTablePath = 
CarbonStorePath.getCarbonTablePath(carbonTable.getAbsoluteTableIdentifier)
-val segmentDir = carbonTablePath.getCarbonDataDirectoryPath(segmentNo)
+val segmentDir = carbonTable.getSemgentPath(segmentNo)
 new SegmentIndexFileStore().getIndexFilesFromSegment(segmentDir).size()
   }
 

http://git-wip-us.apache.org/repos/asf/carbondata/blob/f06824e9/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/dataload/TestDataLoadWithFileName.scala
--
diff --git 
a/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/dataload/TestDataLoadWithFileName.scala
 
b/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/dataload/TestDataLoadWithFileName.scala
index db0a62c..b9d8e12 100644
--- 

[10/50] [abbrv] carbondata git commit: [CARBONDATA-2099] Refactor query scan process to improve readability

2018-03-04 Thread jackylk
http://git-wip-us.apache.org/repos/asf/carbondata/blob/92c9f224/core/src/main/java/org/apache/carbondata/core/scan/processor/DataBlockIterator.java
--
diff --git 
a/core/src/main/java/org/apache/carbondata/core/scan/processor/DataBlockIterator.java
 
b/core/src/main/java/org/apache/carbondata/core/scan/processor/DataBlockIterator.java
new file mode 100644
index 000..fde4e55
--- /dev/null
+++ 
b/core/src/main/java/org/apache/carbondata/core/scan/processor/DataBlockIterator.java
@@ -0,0 +1,269 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.carbondata.core.scan.processor;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.concurrent.Callable;
+import java.util.concurrent.ExecutionException;
+import java.util.concurrent.ExecutorService;
+import java.util.concurrent.Future;
+import java.util.concurrent.atomic.AtomicBoolean;
+
+import org.apache.carbondata.common.CarbonIterator;
+import org.apache.carbondata.core.datastore.DataRefNode;
+import org.apache.carbondata.core.datastore.FileReader;
+import org.apache.carbondata.core.scan.collector.ResultCollectorFactory;
+import org.apache.carbondata.core.scan.collector.ScannedResultCollector;
+import org.apache.carbondata.core.scan.executor.infos.BlockExecutionInfo;
+import org.apache.carbondata.core.scan.result.BlockletScannedResult;
+import org.apache.carbondata.core.scan.result.vector.CarbonColumnarBatch;
+import org.apache.carbondata.core.scan.scanner.BlockletScanner;
+import org.apache.carbondata.core.scan.scanner.impl.BlockletFilterScanner;
+import org.apache.carbondata.core.scan.scanner.impl.BlockletFullScanner;
+import org.apache.carbondata.core.stats.QueryStatisticsModel;
+import org.apache.carbondata.core.util.TaskMetricsMap;
+
+/**
+ * This abstract class provides a skeletal implementation of the
+ * Block iterator.
+ */
+public class DataBlockIterator extends CarbonIterator> {
+
+  /**
+   * iterator which will be used to iterate over blocklets
+   */
+  private BlockletIterator blockletIterator;
+
+  /**
+   * result collector which will be used to aggregate the scanned result
+   */
+  private ScannedResultCollector scannerResultAggregator;
+
+  /**
+   * processor which will be used to process the block processing can be
+   * filter processing or non filter processing
+   */
+  private BlockletScanner blockletScanner;
+
+  /**
+   * batch size of result
+   */
+  private int batchSize;
+
+  private ExecutorService executorService;
+
+  private Future future;
+
+  private Future futureIo;
+
+  private BlockletScannedResult scannedResult;
+
+  private BlockExecutionInfo blockExecutionInfo;
+
+  private FileReader fileReader;
+
+  private AtomicBoolean nextBlock;
+
+  private AtomicBoolean nextRead;
+
+  public DataBlockIterator(BlockExecutionInfo blockExecutionInfo, FileReader 
fileReader,
+  int batchSize, QueryStatisticsModel queryStatisticsModel, 
ExecutorService executorService) {
+this.blockExecutionInfo = blockExecutionInfo;
+this.fileReader = fileReader;
+blockletIterator = new 
BlockletIterator(blockExecutionInfo.getFirstDataBlock(),
+blockExecutionInfo.getNumberOfBlockToScan());
+if (blockExecutionInfo.getFilterExecuterTree() != null) {
+  blockletScanner = new BlockletFilterScanner(blockExecutionInfo, 
queryStatisticsModel);
+} else {
+  blockletScanner = new BlockletFullScanner(blockExecutionInfo, 
queryStatisticsModel);
+}
+this.scannerResultAggregator =
+ResultCollectorFactory.getScannedResultCollector(blockExecutionInfo);
+this.batchSize = batchSize;
+this.executorService = executorService;
+this.nextBlock = new AtomicBoolean(false);
+this.nextRead = new AtomicBoolean(false);
+  }
+
+  @Override
+  public List next() {
+List collectedResult = null;
+if (updateScanner()) {
+  collectedResult = 
this.scannerResultAggregator.collectResultInRow(scannedResult, batchSize);
+  while (collectedResult.size() < batchSize && updateScanner()) {
+List data = this.scannerResultAggregator
+

[31/50] [abbrv] carbondata git commit: [CARBONDATA-1544][Datamap] Datamap FineGrain implementation

2018-03-04 Thread jackylk
http://git-wip-us.apache.org/repos/asf/carbondata/blob/bb5bb00a/datamap/examples/src/minmaxdatamap/main/java/org/apache/carbondata/datamap/examples/MinMaxDataMapFactory.java
--
diff --git 
a/datamap/examples/src/minmaxdatamap/main/java/org/apache/carbondata/datamap/examples/MinMaxDataMapFactory.java
 
b/datamap/examples/src/minmaxdatamap/main/java/org/apache/carbondata/datamap/examples/MinMaxDataMapFactory.java
index b196d0d..266c107 100644
--- 
a/datamap/examples/src/minmaxdatamap/main/java/org/apache/carbondata/datamap/examples/MinMaxDataMapFactory.java
+++ 
b/datamap/examples/src/minmaxdatamap/main/java/org/apache/carbondata/datamap/examples/MinMaxDataMapFactory.java
@@ -17,7 +17,6 @@
 
 package org.apache.carbondata.datamap.examples;
 
-import java.io.File;
 import java.io.IOException;
 import java.util.ArrayList;
 import java.util.Arrays;
@@ -25,90 +24,94 @@ import java.util.List;
 
 import org.apache.carbondata.core.datamap.DataMapDistributable;
 import org.apache.carbondata.core.datamap.DataMapMeta;
-import org.apache.carbondata.core.datamap.dev.DataMap;
-import org.apache.carbondata.core.datamap.dev.DataMapFactory;
-import org.apache.carbondata.core.datamap.dev.DataMapWriter;
-import org.apache.carbondata.core.events.ChangeEvent;
-import org.apache.carbondata.core.indexstore.schema.FilterType;
+import org.apache.carbondata.core.datamap.Segment;
+import org.apache.carbondata.core.datamap.dev.AbstractDataMapWriter;
+import org.apache.carbondata.core.datamap.dev.DataMapModel;
+import 
org.apache.carbondata.core.datamap.dev.cgdatamap.AbstractCoarseGrainDataMap;
+import 
org.apache.carbondata.core.datamap.dev.cgdatamap.AbstractCoarseGrainDataMapFactory;
 import org.apache.carbondata.core.memory.MemoryException;
 import org.apache.carbondata.core.metadata.AbsoluteTableIdentifier;
-
+import org.apache.carbondata.core.scan.filter.intf.ExpressionType;
+import org.apache.carbondata.core.util.path.CarbonTablePath;
+import org.apache.carbondata.events.Event;
 
 /**
  * Min Max DataMap Factory
  */
-public class MinMaxDataMapFactory implements DataMapFactory {
+public class MinMaxDataMapFactory extends AbstractCoarseGrainDataMapFactory {
 
   private AbsoluteTableIdentifier identifier;
 
-  @Override
-  public void init(AbsoluteTableIdentifier identifier, String dataMapName) {
+  @Override public void init(AbsoluteTableIdentifier identifier, String 
dataMapName) {
 this.identifier = identifier;
   }
 
   /**
* createWriter will return the MinMaxDataWriter.
-   * @param segmentId
+   *
+   * @param segment
* @return
*/
-  @Override
-  public DataMapWriter createWriter(String segmentId) {
-return new MinMaxDataWriter();
+  @Override public AbstractDataMapWriter createWriter(Segment segment) {
+return new MinMaxDataWriter(identifier, segment.getSegmentNo(),
+CarbonTablePath.getSegmentPath(identifier.getTablePath(), 
segment.getSegmentNo()));
   }
 
   /**
* getDataMaps Factory method Initializes the Min Max Data Map and returns.
-   * @param segmentId
+   *
+   * @param segment
* @return
* @throws IOException
*/
-  @Override public List getDataMaps(String segmentId) throws 
IOException {
-List dataMapList = new ArrayList<>();
+  @Override public List getDataMaps(Segment 
segment)
+  throws IOException {
+List dataMapList = new ArrayList<>();
 // Form a dataMap of Type MinMaxDataMap.
 MinMaxDataMap dataMap = new MinMaxDataMap();
 try {
-  dataMap.init(identifier.getTablePath() + "/Fact/Part0/Segment_" + 
segmentId + File.separator);
+  dataMap.init(new DataMapModel(
+  CarbonTablePath.getSegmentPath(identifier.getTablePath(), 
segment.getSegmentNo(;
 } catch (MemoryException ex) {
-
+  throw new IOException(ex);
 }
 dataMapList.add(dataMap);
 return dataMapList;
   }
 
   /**
-   *
-   * @param segmentId
+   * @param segment
* @return
*/
-  @Override public List toDistributable(String 
segmentId) {
+  @Override public List toDistributable(Segment segment) 
{
 return null;
   }
 
   /**
* Clear the DataMap.
-   * @param segmentId
+   *
+   * @param segment
*/
-  @Override public void clear(String segmentId) {
+  @Override public void clear(Segment segment) {
   }
 
   /**
* Clearing the data map.
*/
-  @Override
-  public void clear() {
+  @Override public void clear() {
   }
 
-  @Override public DataMap getDataMap(DataMapDistributable distributable) {
+  @Override public List 
getDataMaps(DataMapDistributable distributable)
+  throws IOException {
 return null;
   }
 
-  @Override
-  public void fireEvent(ChangeEvent event) {
+  @Override public void fireEvent(Event event) {
 
   }
 
-  @Override
-  public DataMapMeta getMeta() {
-return new DataMapMeta(new ArrayList(Arrays.asList("c2")), 
FilterType.EQUALTO);
+  @Override public DataMapMeta getMeta() {
+return new DataMapMeta(new 

[05/50] [abbrv] carbondata git commit: [CARBONDATA-1827] S3 Carbon Implementation

2018-03-04 Thread jackylk
[CARBONDATA-1827] S3 Carbon Implementation

1.Provide support for s3 in carbondata.
2.Added S3Example to create carbon table on s3.
3.Added S3CSVExample to load carbon table using csv from s3.

This closes #1805


Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo
Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/9086a1b9
Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/9086a1b9
Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/9086a1b9

Branch: refs/heads/carbonstore
Commit: 9086a1b9f2cd6cf1d4d42290a4e3678b01472714
Parents: 0c75ab7
Author: SangeetaGulia 
Authored: Thu Sep 21 14:56:26 2017 +0530
Committer: Jacky Li 
Committed: Sun Mar 4 20:30:31 2018 +0800

--
 .../core/constants/CarbonCommonConstants.java   |  21 +++
 .../filesystem/AbstractDFSCarbonFile.java   |  20 ++-
 .../datastore/filesystem/HDFSCarbonFile.java|   5 +-
 .../core/datastore/impl/FileFactory.java|  11 +-
 .../core/locks/CarbonLockFactory.java   |  28 ++--
 .../carbondata/core/locks/S3FileLock.java   | 111 +
 .../carbondata/core/util/CarbonProperties.java  |   3 +-
 .../filesystem/HDFSCarbonFileTest.java  |   8 +-
 examples/spark2/pom.xml |   5 +
 examples/spark2/src/main/resources/data1.csv|  11 ++
 .../carbondata/examples/S3CsvExample.scala  |  99 +++
 .../apache/carbondata/examples/S3Example.scala  | 164 +++
 .../spark/rdd/NewCarbonDataLoadRDD.scala|  42 -
 integration/spark2/pom.xml  |  43 +
 .../spark/rdd/CarbonDataRDDFactory.scala|   3 +-
 .../org/apache/spark/sql/CarbonSession.scala|   3 +
 16 files changed, 554 insertions(+), 23 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/carbondata/blob/9086a1b9/core/src/main/java/org/apache/carbondata/core/constants/CarbonCommonConstants.java
--
diff --git 
a/core/src/main/java/org/apache/carbondata/core/constants/CarbonCommonConstants.java
 
b/core/src/main/java/org/apache/carbondata/core/constants/CarbonCommonConstants.java
index b2a3375..af3ed99 100644
--- 
a/core/src/main/java/org/apache/carbondata/core/constants/CarbonCommonConstants.java
+++ 
b/core/src/main/java/org/apache/carbondata/core/constants/CarbonCommonConstants.java
@@ -167,6 +167,22 @@ public final class CarbonCommonConstants {
   public static final String S3N_PREFIX = "s3n://";
 
   public static final String S3A_PREFIX = "s3a://";
+  /**
+   * Access Key for s3n
+   */
+  public static final String S3N_ACCESS_KEY = "fs.s3n.awsAccessKeyId";
+  /**
+   * Secret Key for s3n
+   */
+  public static final String S3N_SECRET_KEY = "fs.s3n.awsSecretAccessKey";
+  /**
+   * Access Key for s3
+   */
+  public static final String S3_ACCESS_KEY = "fs.s3.awsAccessKeyId";
+  /**
+   * Secret Key for s3
+   */
+  public static final String S3_SECRET_KEY = "fs.s3.awsSecretAccessKey";
 
   /**
* FS_DEFAULT_FS
@@ -937,6 +953,11 @@ public final class CarbonCommonConstants {
   public static final String CARBON_LOCK_TYPE_HDFS = "HDFSLOCK";
 
   /**
+   * S3LOCK TYPE
+   */
+  public static final String CARBON_LOCK_TYPE_S3 = "S3LOCK";
+
+  /**
* Invalid filter member log string
*/
   public static final String FILTER_INVALID_MEMBER =

http://git-wip-us.apache.org/repos/asf/carbondata/blob/9086a1b9/core/src/main/java/org/apache/carbondata/core/datastore/filesystem/AbstractDFSCarbonFile.java
--
diff --git 
a/core/src/main/java/org/apache/carbondata/core/datastore/filesystem/AbstractDFSCarbonFile.java
 
b/core/src/main/java/org/apache/carbondata/core/datastore/filesystem/AbstractDFSCarbonFile.java
index 68eaa21..fd5dc40 100644
--- 
a/core/src/main/java/org/apache/carbondata/core/datastore/filesystem/AbstractDFSCarbonFile.java
+++ 
b/core/src/main/java/org/apache/carbondata/core/datastore/filesystem/AbstractDFSCarbonFile.java
@@ -51,7 +51,7 @@ import org.apache.hadoop.io.compress.GzipCodec;
 import org.apache.hadoop.io.compress.Lz4Codec;
 import org.apache.hadoop.io.compress.SnappyCodec;
 
-public abstract  class AbstractDFSCarbonFile implements CarbonFile {
+public abstract class AbstractDFSCarbonFile implements CarbonFile {
   /**
* LOGGER
*/
@@ -262,18 +262,28 @@ public abstract  class AbstractDFSCarbonFile implements 
CarbonFile {
   @Override public DataOutputStream getDataOutputStream(String path, 
FileFactory.FileType fileType,
   int bufferSize, boolean append) throws IOException {
 Path pt = new Path(path);
-FileSystem fs = pt.getFileSystem(FileFactory.getConfiguration());
+FileSystem fileSystem = pt.getFileSystem(FileFactory.getConfiguration());
 FSDataOutputStream stream = 

[49/50] [abbrv] carbondata git commit: [CARBONDATA-2186] Add InterfaceAudience.Internal to annotate internal interface

2018-03-04 Thread jackylk
[CARBONDATA-2186] Add InterfaceAudience.Internal to annotate internal interface

This closes #1986


Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo
Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/623a1f93
Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/623a1f93
Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/623a1f93

Branch: refs/heads/carbonstore
Commit: 623a1f93bf50bbbf665d98d71fe2190a4742
Parents: 503e0d9
Author: Jacky Li 
Authored: Tue Feb 20 11:16:53 2018 +0800
Committer: Jacky Li 
Committed: Sun Mar 4 20:32:14 2018 +0800

--
 .../java/org/apache/carbondata/common/Maps.java  |  2 +-
 .../org/apache/carbondata/common/Strings.java|  2 +-
 .../common/annotations/InterfaceAudience.java| 19 ++-
 .../common/annotations/InterfaceStability.java   |  2 +-
 .../loading/model/CarbonLoadModelBuilder.java|  2 +-
 .../processing/loading/model/LoadOption.java |  2 +-
 .../carbondata/sdk/file/CSVCarbonWriter.java |  4 +---
 7 files changed, 20 insertions(+), 13 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/carbondata/blob/623a1f93/common/src/main/java/org/apache/carbondata/common/Maps.java
--
diff --git a/common/src/main/java/org/apache/carbondata/common/Maps.java 
b/common/src/main/java/org/apache/carbondata/common/Maps.java
index 14fc329..4e76192 100644
--- a/common/src/main/java/org/apache/carbondata/common/Maps.java
+++ b/common/src/main/java/org/apache/carbondata/common/Maps.java
@@ -21,7 +21,7 @@ import java.util.Map;
 
 import org.apache.carbondata.common.annotations.InterfaceAudience;
 
-@InterfaceAudience.Developer
+@InterfaceAudience.Internal
 public class Maps {
 
   /**

http://git-wip-us.apache.org/repos/asf/carbondata/blob/623a1f93/common/src/main/java/org/apache/carbondata/common/Strings.java
--
diff --git a/common/src/main/java/org/apache/carbondata/common/Strings.java 
b/common/src/main/java/org/apache/carbondata/common/Strings.java
index 08fdc3c..23c7f9f 100644
--- a/common/src/main/java/org/apache/carbondata/common/Strings.java
+++ b/common/src/main/java/org/apache/carbondata/common/Strings.java
@@ -21,7 +21,7 @@ import java.util.Objects;
 
 import org.apache.carbondata.common.annotations.InterfaceAudience;
 
-@InterfaceAudience.Developer
+@InterfaceAudience.Internal
 public class Strings {
 
   /**

http://git-wip-us.apache.org/repos/asf/carbondata/blob/623a1f93/common/src/main/java/org/apache/carbondata/common/annotations/InterfaceAudience.java
--
diff --git 
a/common/src/main/java/org/apache/carbondata/common/annotations/InterfaceAudience.java
 
b/common/src/main/java/org/apache/carbondata/common/annotations/InterfaceAudience.java
index fa9729d..8d214ff 100644
--- 
a/common/src/main/java/org/apache/carbondata/common/annotations/InterfaceAudience.java
+++ 
b/common/src/main/java/org/apache/carbondata/common/annotations/InterfaceAudience.java
@@ -25,10 +25,10 @@ import java.lang.annotation.RetentionPolicy;
  * This annotation is ported and modified from Apache Hadoop project.
  *
  * Annotation to inform users of a package, class or method's intended 
audience.
- * Currently the audience can be {@link User}, {@link Developer}
+ * Currently the audience can be {@link User}, {@link Developer}, {@link 
Internal}
  *
  * Public classes that are not marked with this annotation must be
- * considered by default as {@link Developer}.
+ * considered by default as {@link Internal}.
  *
  * External applications must only use classes that are marked {@link User}.
  *
@@ -47,12 +47,21 @@ public class InterfaceAudience {
   public @interface User { }
 
   /**
-   * Intended only for developers to extend interface for CarbonData project
-   * For example, new Datamap implementations.
+   * Intended for developers to develop extension for Apache CarbonData project
+   * For example, "Index DataMap" to add a new index implementation, etc
*/
   @Documented
   @Retention(RetentionPolicy.RUNTIME)
-  public @interface Developer { }
+  public @interface Developer {
+String[] value();
+  }
+
+  /**
+   * Intended only for internal usage within Apache CarbonData project.
+   */
+  @Documented
+  @Retention(RetentionPolicy.RUNTIME)
+  public @interface Internal { }
 
   private InterfaceAudience() { } // Audience can't exist on its own
 }

http://git-wip-us.apache.org/repos/asf/carbondata/blob/623a1f93/common/src/main/java/org/apache/carbondata/common/annotations/InterfaceStability.java
--
diff --git 

[29/50] [abbrv] carbondata git commit: [CARBONDATA-2080] [S3-Implementation] Propagated hadoopConf from driver to executor for s3 implementation in cluster mode.

2018-03-04 Thread jackylk
[CARBONDATA-2080] [S3-Implementation] Propagated hadoopConf from driver to 
executor for s3 implementation in cluster mode.

Problem : hadoopconf was not getting propagated from driver to the executor 
that's why load was failing to the distributed environment.
Solution: Setting the Hadoop conf in base class CarbonRDD
How to verify this PR :
Execute the load in the cluster mode It should be a success using location s3.

This closes #1860


Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo
Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/dfbdf3db
Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/dfbdf3db
Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/dfbdf3db

Branch: refs/heads/carbonstore
Commit: dfbdf3db00cbb488e49d3125b4ec93ff9e0dc9b2
Parents: f06824e
Author: Jatin 
Authored: Thu Jan 25 16:53:00 2018 +0530
Committer: Jacky Li 
Committed: Sun Mar 4 20:32:12 2018 +0800

--
 .../spark/rdd/AlterTableAddColumnRDD.scala  |  2 +-
 .../spark/rdd/AlterTableDropColumnRDD.scala |  2 +-
 .../spark/rdd/CarbonCleanFilesRDD.scala |  2 +-
 .../spark/rdd/CarbonDeleteLoadByDateRDD.scala   |  2 +-
 .../spark/rdd/CarbonDeleteLoadRDD.scala |  2 +-
 .../spark/rdd/CarbonDropPartitionRDD.scala  |  2 +-
 .../spark/rdd/CarbonDropTableRDD.scala  |  2 +-
 .../spark/rdd/CarbonGlobalDictionaryRDD.scala   |  3 +-
 .../spark/rdd/CarbonMergeFilesRDD.scala |  0
 .../carbondata/spark/rdd/CarbonMergerRDD.scala  |  2 +-
 .../apache/carbondata/spark/rdd/CarbonRDD.scala | 32 ++--
 .../spark/rdd/NewCarbonDataLoadRDD.scala|  2 +-
 .../carbondata/spark/rdd/SparkDataMapJob.scala  |  2 +-
 .../apache/spark/rdd/DataLoadCoalescedRDD.scala |  3 +-
 .../apache/spark/rdd/UpdateCoalescedRDD.scala   |  2 +-
 .../carbondata/streaming/StreamHandoffRDD.scala |  2 +-
 16 files changed, 45 insertions(+), 17 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/carbondata/blob/dfbdf3db/integration/spark-common/src/main/scala/org/apache/carbondata/spark/rdd/AlterTableAddColumnRDD.scala
--
diff --git 
a/integration/spark-common/src/main/scala/org/apache/carbondata/spark/rdd/AlterTableAddColumnRDD.scala
 
b/integration/spark-common/src/main/scala/org/apache/carbondata/spark/rdd/AlterTableAddColumnRDD.scala
index 56a66b9..7c1edea 100644
--- 
a/integration/spark-common/src/main/scala/org/apache/carbondata/spark/rdd/AlterTableAddColumnRDD.scala
+++ 
b/integration/spark-common/src/main/scala/org/apache/carbondata/spark/rdd/AlterTableAddColumnRDD.scala
@@ -50,7 +50,7 @@ class AddColumnPartition(rddId: Int, idx: Int, schema: 
ColumnSchema) extends Par
 class AlterTableAddColumnRDD[K, V](sc: SparkContext,
 @transient newColumns: Seq[ColumnSchema],
 identifier: AbsoluteTableIdentifier)
-  extends CarbonRDD[(Int, SegmentStatus)](sc, Nil) {
+  extends CarbonRDD[(Int, SegmentStatus)](sc, Nil, sc.hadoopConfiguration) {
 
   val lockType: String = 
CarbonProperties.getInstance.getProperty(CarbonCommonConstants.LOCK_TYPE,
 CarbonCommonConstants.CARBON_LOCK_TYPE_HDFS)

http://git-wip-us.apache.org/repos/asf/carbondata/blob/dfbdf3db/integration/spark-common/src/main/scala/org/apache/carbondata/spark/rdd/AlterTableDropColumnRDD.scala
--
diff --git 
a/integration/spark-common/src/main/scala/org/apache/carbondata/spark/rdd/AlterTableDropColumnRDD.scala
 
b/integration/spark-common/src/main/scala/org/apache/carbondata/spark/rdd/AlterTableDropColumnRDD.scala
index 248f351..e14524e 100644
--- 
a/integration/spark-common/src/main/scala/org/apache/carbondata/spark/rdd/AlterTableDropColumnRDD.scala
+++ 
b/integration/spark-common/src/main/scala/org/apache/carbondata/spark/rdd/AlterTableDropColumnRDD.scala
@@ -48,7 +48,7 @@ class DropColumnPartition(rddId: Int, idx: Int, schema: 
ColumnSchema) extends Pa
 class AlterTableDropColumnRDD[K, V](sc: SparkContext,
 @transient newColumns: Seq[ColumnSchema],
 carbonTableIdentifier: AbsoluteTableIdentifier)
-  extends CarbonRDD[(Int, SegmentStatus)](sc, Nil) {
+  extends CarbonRDD[(Int, SegmentStatus)](sc, Nil, sc.hadoopConfiguration) {
 
   override def getPartitions: Array[Partition] = {
 newColumns.zipWithIndex.map { column =>

http://git-wip-us.apache.org/repos/asf/carbondata/blob/dfbdf3db/integration/spark-common/src/main/scala/org/apache/carbondata/spark/rdd/CarbonCleanFilesRDD.scala
--
diff --git 
a/integration/spark-common/src/main/scala/org/apache/carbondata/spark/rdd/CarbonCleanFilesRDD.scala
 
b/integration/spark-common/src/main/scala/org/apache/carbondata/spark/rdd/CarbonCleanFilesRDD.scala
index 

[44/50] [abbrv] carbondata git commit: [CARBONDATA-2159] Remove carbon-spark dependency in store-sdk module

2018-03-04 Thread jackylk
[CARBONDATA-2159] Remove carbon-spark dependency in store-sdk module

To make assembling JAR of store-sdk module, it should not depend on 
carbon-spark module

This closes #1970


Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo
Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/dcfe73b8
Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/dcfe73b8
Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/dcfe73b8

Branch: refs/heads/carbonstore
Commit: dcfe73b8b07267369f8c58130f27b75efccb4ee1
Parents: 5fccdab
Author: Jacky Li 
Authored: Sun Feb 11 21:37:04 2018 +0800
Committer: Jacky Li 
Committed: Sun Mar 4 20:32:13 2018 +0800

--
 .../java/org/apache/carbondata/common/Maps.java |  39 ++
 .../org/apache/carbondata/common/Strings.java   |   3 +
 .../exceptions/TableStatusLockException.java|  34 ++
 .../sql/InvalidLoadOptionException.java |  33 +
 .../sql/MalformedCarbonCommandException.java|  75 +++
 .../sql/MalformedDataMapCommandException.java   |  37 ++
 .../exceptions/sql/NoSuchDataMapException.java  |  39 ++
 .../carbondata/core/datamap/TableDataMap.java   |   5 +-
 .../exception/ConcurrentOperationException.java |  50 ++
 .../statusmanager/SegmentStatusManager.java | 124 
 .../carbondata/core/util/DeleteLoadFolders.java | 210 +++
 .../preaggregate/TestPreAggCreateCommand.scala  |   2 +-
 .../preaggregate/TestPreAggregateDrop.scala |   2 +-
 .../timeseries/TestTimeSeriesCreateTable.scala  |   2 +-
 .../timeseries/TestTimeSeriesDropSuite.scala|   2 +-
 .../TestTimeseriesTableSelection.scala  |   2 +-
 .../TestDataLoadWithColumnsMoreThanSchema.scala |   3 +-
 .../dataload/TestGlobalSortDataLoad.scala   |   2 +-
 .../TestLoadDataWithDiffTimestampFormat.scala   |   2 +-
 .../TestLoadDataWithFileHeaderException.scala   |  11 +-
 ...ataWithMalformedCarbonCommandException.scala |   3 +-
 .../testsuite/dataload/TestLoadOptions.scala|   2 +-
 .../dataload/TestTableLevelBlockSize.scala  |   4 +-
 .../testsuite/datamap/TestDataMapCommand.scala  |   2 +-
 .../dataretention/DataRetentionTestCase.scala   |   2 +-
 .../spark/testsuite/datetype/DateTypeTest.scala |   2 +-
 .../testsuite/sortcolumns/TestSortColumns.scala |   3 +-
 integration/spark-common/pom.xml|   5 -
 .../exception/ConcurrentOperationException.java |  38 --
 .../MalformedCarbonCommandException.java|  69 ---
 .../MalformedDataMapCommandException.java   |  32 -
 .../spark/exception/NoSuchDataMapException.java |  33 -
 .../org/apache/carbondata/api/CarbonStore.scala |   3 +-
 .../spark/CarbonColumnValidator.scala   |   8 +-
 .../carbondata/spark/load/ValidateUtil.scala|  72 ---
 .../carbondata/spark/rdd/CarbonMergerRDD.scala  |   6 +-
 .../carbondata/spark/util/CommonUtil.scala  |  70 +--
 .../carbondata/spark/util/DataLoadingUtil.scala | 610 ---
 .../spark/util/GlobalDictionaryUtil.scala   |   2 +-
 .../spark/sql/catalyst/CarbonDDLSqlParser.scala |   2 +-
 .../spark/rdd/CarbonDataRDDFactory.scala|   4 +-
 .../spark/rdd/CarbonTableCompactor.scala|   2 +-
 .../org/apache/spark/sql/CarbonSource.scala |   2 +-
 .../datamap/CarbonCreateDataMapCommand.scala|   2 +-
 .../datamap/CarbonDropDataMapCommand.scala  |   2 +-
 .../CarbonAlterTableCompactionCommand.scala |  13 +-
 .../management/CarbonLoadDataCommand.scala  |  17 +-
 .../CarbonProjectForDeleteCommand.scala |   2 +-
 .../CarbonProjectForUpdateCommand.scala |   2 +-
 .../command/mutation/IUDCommonUtil.scala|   2 +-
 .../CreatePreAggregateTableCommand.scala|   7 +-
 .../preaaggregate/PreAggregateUtil.scala|   2 +-
 .../schema/CarbonAlterTableRenameCommand.scala  |   3 +-
 .../command/timeseries/TimeSeriesUtil.scala |   2 +-
 .../datasources/CarbonFileFormat.scala  |  14 +-
 .../sql/execution/strategy/DDLStrategy.scala|   2 +-
 .../strategy/StreamingTableStrategy.scala   |   2 +-
 .../execution/command/CarbonHiveCommands.scala  |   2 +-
 .../sql/parser/CarbonSpark2SqlParser.scala  |   2 +-
 .../spark/sql/parser/CarbonSparkSqlParser.scala |   2 +-
 .../org/apache/spark/util/AlterTableUtil.scala  |   2 +-
 .../org/apache/spark/util/TableAPIUtil.scala|   2 +-
 .../spark/sql/hive/CarbonSessionState.scala |   7 +-
 .../segmentreading/TestSegmentReading.scala |   2 +-
 .../spark/util/AllDictionaryTestCase.scala  |   4 +-
 .../util/ExternalColumnDictionaryTestCase.scala |   6 +-
 .../TestStreamingTableOperation.scala   |   4 +-
 .../bucketing/TableBucketingTestCase.scala  |   2 +-
 .../vectorreader/AddColumnTestCases.scala   |   2 +-
 .../loading/model/CarbonLoadModel.java  |  14 +-
 .../loading/model/CarbonLoadModelBuilder.java   | 322 ++
 .../processing/loading/model/LoadOption.java| 251 

[18/50] [abbrv] carbondata git commit: [CARBONDATA-2099] Refactor query scan process to improve readability

2018-03-04 Thread jackylk
[CARBONDATA-2099] Refactor query scan process to improve readability

Unified concepts in scan process flow:

1.QueryModel contains all parameter for scan, it is created by API in 
CarbonTable. (In future, CarbonTable will be the entry point for various table 
operations)
2.Use term ColumnChunk to represent one column in one blocklet, and use 
ChunkIndex in reader to read specified column chunk
3.Use term ColumnPage to represent one page in one ColumnChunk
4.QueryColumn => ProjectionColumn, indicating it is for projection

This closes #1874


Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo
Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/92c9f224
Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/92c9f224
Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/92c9f224

Branch: refs/heads/carbonstore
Commit: 92c9f224094581378a681fd1f7b0cb02b923687c
Parents: bd40a0d
Author: Jacky Li 
Authored: Tue Jan 30 21:24:04 2018 +0800
Committer: Jacky Li 
Committed: Sun Mar 4 20:30:32 2018 +0800

--
 .../dictionary/AbstractDictionaryCache.java |   3 +-
 .../cache/dictionary/DictionaryCacheLoader.java |   7 +-
 .../dictionary/DictionaryCacheLoaderImpl.java   |  11 +-
 .../core/datastore/BTreeBuilderInfo.java|   6 -
 .../carbondata/core/datastore/DataRefNode.java  |  81 +--
 .../carbondata/core/datastore/FileHolder.java   | 118 
 .../carbondata/core/datastore/FileReader.java   | 114 +++
 .../core/datastore/block/SegmentProperties.java |  50 +-
 .../chunk/DimensionColumnDataChunk.java | 116 ---
 .../datastore/chunk/DimensionColumnPage.java| 111 +++
 .../chunk/impl/AbstractDimensionColumnPage.java |  89 +++
 .../chunk/impl/AbstractDimensionDataChunk.java  |  95 ---
 .../impl/ColumnGroupDimensionColumnPage.java| 194 ++
 .../impl/ColumnGroupDimensionDataChunk.java | 194 --
 .../chunk/impl/DimensionRawColumnChunk.java |  46 +-
 .../impl/FixedLengthDimensionColumnPage.java| 163 +
 .../impl/FixedLengthDimensionDataChunk.java | 163 -
 .../chunk/impl/MeasureRawColumnChunk.java   |  26 +-
 .../impl/VariableLengthDimensionColumnPage.java | 133 
 .../impl/VariableLengthDimensionDataChunk.java  | 140 
 .../reader/DimensionColumnChunkReader.java  |  14 +-
 .../chunk/reader/MeasureColumnChunkReader.java  |  12 +-
 .../AbstractChunkReaderV2V3Format.java  |  34 +-
 ...mpressedDimensionChunkFileBasedReaderV1.java |  38 +-
 ...mpressedDimensionChunkFileBasedReaderV2.java |  30 +-
 ...essedDimChunkFileBasedPageLevelReaderV3.java |  11 +-
 ...mpressedDimensionChunkFileBasedReaderV3.java |  49 +-
 .../AbstractMeasureChunkReaderV2V3Format.java   |  42 +-
 ...CompressedMeasureChunkFileBasedReaderV1.java |  16 +-
 ...CompressedMeasureChunkFileBasedReaderV2.java |  24 +-
 ...CompressedMeasureChunkFileBasedReaderV3.java |  45 +-
 ...essedMsrChunkFileBasedPageLevelReaderV3.java |   8 +-
 .../chunk/store/ColumnPageWrapper.java  |  30 +-
 .../chunk/store/DimensionDataChunkStore.java|   8 +-
 .../SafeFixedLengthDimensionDataChunkStore.java |   6 +-
 ...feVariableLengthDimensionDataChunkStore.java |   8 +-
 ...nsafeFixedLengthDimensionDataChunkStore.java |  10 +-
 ...afeVariableLengthDimesionDataChunkStore.java |  10 +-
 .../datastore/columnar/ColumnGroupModel.java|  26 -
 .../core/datastore/impl/DFSFileHolderImpl.java  | 166 -
 .../core/datastore/impl/DFSFileReaderImpl.java  | 155 
 .../datastore/impl/DefaultFileTypeProvider.java |  16 +-
 .../core/datastore/impl/FileFactory.java|   4 +-
 .../core/datastore/impl/FileHolderImpl.java | 224 --
 .../core/datastore/impl/FileReaderImpl.java | 215 ++
 .../core/datastore/impl/FileTypeInerface.java   |   4 +-
 .../impl/btree/AbstractBTreeLeafNode.java   |  60 +-
 .../impl/btree/BTreeDataRefNodeFinder.java  |   6 +-
 .../datastore/impl/btree/BTreeNonLeafNode.java  |  52 +-
 .../impl/btree/BlockBTreeLeafNode.java  |   6 +-
 .../impl/btree/BlockletBTreeLeafNode.java   |  46 +-
 .../page/encoding/EncodingFactory.java  |   8 +-
 .../server/NonSecureDictionaryServer.java   |   1 -
 .../core/indexstore/BlockletDetailInfo.java |   4 -
 .../blockletindex/BlockletDataRefNode.java  | 228 ++
 .../BlockletDataRefNodeWrapper.java | 241 ---
 .../indexstore/blockletindex/IndexWrapper.java  |   2 +-
 .../blockletindex/SegmentIndexFileStore.java|   7 +-
 .../core/memory/HeapMemoryAllocator.java|   2 +-
 .../core/metadata/blocklet/SegmentInfo.java |  19 -
 .../core/metadata/schema/table/CarbonTable.java | 130 +++-
 .../schema/table/RelationIdentifier.java|  16 -
 .../core/metadata/schema/table/TableInfo.java   |   6 +-
 .../schema/table/column/CarbonColumn.java   |   2 +-
 .../schema/table/column/CarbonDimension.java|  12 -
 

[39/50] [abbrv] carbondata git commit: Revert "[CARBONDATA-2018][DataLoad] Optimization in reading/writing for sort temp row"

2018-03-04 Thread jackylk
http://git-wip-us.apache.org/repos/asf/carbondata/blob/46031a32/processing/src/main/java/org/apache/carbondata/processing/loading/sort/unsafe/holder/UnsafeSortTempFileChunkHolder.java
--
diff --git 
a/processing/src/main/java/org/apache/carbondata/processing/loading/sort/unsafe/holder/UnsafeSortTempFileChunkHolder.java
 
b/processing/src/main/java/org/apache/carbondata/processing/loading/sort/unsafe/holder/UnsafeSortTempFileChunkHolder.java
index 527452a..11b3d43 100644
--- 
a/processing/src/main/java/org/apache/carbondata/processing/loading/sort/unsafe/holder/UnsafeSortTempFileChunkHolder.java
+++ 
b/processing/src/main/java/org/apache/carbondata/processing/loading/sort/unsafe/holder/UnsafeSortTempFileChunkHolder.java
@@ -31,14 +31,15 @@ import org.apache.carbondata.common.logging.LogService;
 import org.apache.carbondata.common.logging.LogServiceFactory;
 import org.apache.carbondata.core.constants.CarbonCommonConstants;
 import org.apache.carbondata.core.datastore.impl.FileFactory;
+import org.apache.carbondata.core.metadata.datatype.DataType;
+import org.apache.carbondata.core.metadata.datatype.DataTypes;
 import org.apache.carbondata.core.util.CarbonProperties;
 import org.apache.carbondata.core.util.CarbonUtil;
-import org.apache.carbondata.processing.loading.row.IntermediateSortTempRow;
-import org.apache.carbondata.processing.loading.sort.SortStepRowHandler;
+import org.apache.carbondata.core.util.DataTypeUtil;
+import 
org.apache.carbondata.processing.loading.sort.unsafe.UnsafeCarbonRowPage;
 import 
org.apache.carbondata.processing.sort.exception.CarbonSortKeyAndGroupByException;
-import 
org.apache.carbondata.processing.sort.sortdata.IntermediateSortTempRowComparator;
+import org.apache.carbondata.processing.sort.sortdata.NewRowComparator;
 import org.apache.carbondata.processing.sort.sortdata.SortParameters;
-import org.apache.carbondata.processing.sort.sortdata.TableFieldStat;
 
 public class UnsafeSortTempFileChunkHolder implements SortTempChunkHolder {
 
@@ -62,15 +63,21 @@ public class UnsafeSortTempFileChunkHolder implements 
SortTempChunkHolder {
* entry count
*/
   private int entryCount;
+
   /**
* return row
*/
-  private IntermediateSortTempRow returnRow;
+  private Object[] returnRow;
+  private int dimCnt;
+  private int complexCnt;
+  private int measureCnt;
+  private boolean[] isNoDictionaryDimensionColumn;
+  private DataType[] measureDataTypes;
   private int readBufferSize;
   private String compressorName;
-  private IntermediateSortTempRow[] currentBuffer;
+  private Object[][] currentBuffer;
 
-  private IntermediateSortTempRow[] backupBuffer;
+  private Object[][] backupBuffer;
 
   private boolean isBackupFilled;
 
@@ -93,21 +100,27 @@ public class UnsafeSortTempFileChunkHolder implements 
SortTempChunkHolder {
 
   private int numberOfObjectRead;
 
-  private TableFieldStat tableFieldStat;
-  private SortStepRowHandler sortStepRowHandler;
-  private Comparator comparator;
+  private int nullSetWordsLength;
+
+  private Comparator comparator;
+
   /**
* Constructor to initialize
*/
   public UnsafeSortTempFileChunkHolder(File tempFile, SortParameters 
parameters) {
 // set temp file
 this.tempFile = tempFile;
+this.dimCnt = parameters.getDimColCount();
+this.complexCnt = parameters.getComplexDimColCount();
+this.measureCnt = parameters.getMeasureColCount();
+this.isNoDictionaryDimensionColumn = 
parameters.getNoDictionaryDimnesionColumn();
+this.measureDataTypes = parameters.getMeasureDataType();
 this.readBufferSize = parameters.getBufferSize();
 this.compressorName = parameters.getSortTempCompressorName();
-this.tableFieldStat = new TableFieldStat(parameters);
-this.sortStepRowHandler = new SortStepRowHandler(tableFieldStat);
+
 this.executorService = Executors.newFixedThreadPool(1);
-comparator = new 
IntermediateSortTempRowComparator(parameters.getNoDictionarySortColumn());
+this.nullSetWordsLength = ((parameters.getMeasureColCount() - 1) >> 6) + 1;
+comparator = new NewRowComparator(parameters.getNoDictionarySortColumn());
 initialize();
   }
 
@@ -156,17 +169,11 @@ public class UnsafeSortTempFileChunkHolder implements 
SortTempChunkHolder {
*
* @throws CarbonSortKeyAndGroupByException problem while reading
*/
-  @Override
   public void readRow() throws CarbonSortKeyAndGroupByException {
 if (prefetch) {
   fillDataForPrefetch();
 } else {
-  try {
-this.returnRow = 
sortStepRowHandler.readIntermediateSortTempRowFromInputStream(stream);
-this.numberOfObjectRead++;
-  } catch (IOException e) {
-throw new CarbonSortKeyAndGroupByException("Problems while reading 
row", e);
-  }
+  this.returnRow = getRowFromStream();
 }
   }
 
@@ -200,22 +207,63 @@ public class UnsafeSortTempFileChunkHolder implements 
SortTempChunkHolder {
   }
 
   /**

[36/50] [abbrv] carbondata git commit: [CARBONDATA-2018][DataLoad] Optimization in reading/writing for sort temp row

2018-03-04 Thread jackylk
http://git-wip-us.apache.org/repos/asf/carbondata/blob/28b5720f/processing/src/main/java/org/apache/carbondata/processing/loading/sort/unsafe/holder/UnsafeSortTempFileChunkHolder.java
--
diff --git 
a/processing/src/main/java/org/apache/carbondata/processing/loading/sort/unsafe/holder/UnsafeSortTempFileChunkHolder.java
 
b/processing/src/main/java/org/apache/carbondata/processing/loading/sort/unsafe/holder/UnsafeSortTempFileChunkHolder.java
index 11b3d43..527452a 100644
--- 
a/processing/src/main/java/org/apache/carbondata/processing/loading/sort/unsafe/holder/UnsafeSortTempFileChunkHolder.java
+++ 
b/processing/src/main/java/org/apache/carbondata/processing/loading/sort/unsafe/holder/UnsafeSortTempFileChunkHolder.java
@@ -31,15 +31,14 @@ import org.apache.carbondata.common.logging.LogService;
 import org.apache.carbondata.common.logging.LogServiceFactory;
 import org.apache.carbondata.core.constants.CarbonCommonConstants;
 import org.apache.carbondata.core.datastore.impl.FileFactory;
-import org.apache.carbondata.core.metadata.datatype.DataType;
-import org.apache.carbondata.core.metadata.datatype.DataTypes;
 import org.apache.carbondata.core.util.CarbonProperties;
 import org.apache.carbondata.core.util.CarbonUtil;
-import org.apache.carbondata.core.util.DataTypeUtil;
-import 
org.apache.carbondata.processing.loading.sort.unsafe.UnsafeCarbonRowPage;
+import org.apache.carbondata.processing.loading.row.IntermediateSortTempRow;
+import org.apache.carbondata.processing.loading.sort.SortStepRowHandler;
 import 
org.apache.carbondata.processing.sort.exception.CarbonSortKeyAndGroupByException;
-import org.apache.carbondata.processing.sort.sortdata.NewRowComparator;
+import 
org.apache.carbondata.processing.sort.sortdata.IntermediateSortTempRowComparator;
 import org.apache.carbondata.processing.sort.sortdata.SortParameters;
+import org.apache.carbondata.processing.sort.sortdata.TableFieldStat;
 
 public class UnsafeSortTempFileChunkHolder implements SortTempChunkHolder {
 
@@ -63,21 +62,15 @@ public class UnsafeSortTempFileChunkHolder implements 
SortTempChunkHolder {
* entry count
*/
   private int entryCount;
-
   /**
* return row
*/
-  private Object[] returnRow;
-  private int dimCnt;
-  private int complexCnt;
-  private int measureCnt;
-  private boolean[] isNoDictionaryDimensionColumn;
-  private DataType[] measureDataTypes;
+  private IntermediateSortTempRow returnRow;
   private int readBufferSize;
   private String compressorName;
-  private Object[][] currentBuffer;
+  private IntermediateSortTempRow[] currentBuffer;
 
-  private Object[][] backupBuffer;
+  private IntermediateSortTempRow[] backupBuffer;
 
   private boolean isBackupFilled;
 
@@ -100,27 +93,21 @@ public class UnsafeSortTempFileChunkHolder implements 
SortTempChunkHolder {
 
   private int numberOfObjectRead;
 
-  private int nullSetWordsLength;
-
-  private Comparator comparator;
-
+  private TableFieldStat tableFieldStat;
+  private SortStepRowHandler sortStepRowHandler;
+  private Comparator comparator;
   /**
* Constructor to initialize
*/
   public UnsafeSortTempFileChunkHolder(File tempFile, SortParameters 
parameters) {
 // set temp file
 this.tempFile = tempFile;
-this.dimCnt = parameters.getDimColCount();
-this.complexCnt = parameters.getComplexDimColCount();
-this.measureCnt = parameters.getMeasureColCount();
-this.isNoDictionaryDimensionColumn = 
parameters.getNoDictionaryDimnesionColumn();
-this.measureDataTypes = parameters.getMeasureDataType();
 this.readBufferSize = parameters.getBufferSize();
 this.compressorName = parameters.getSortTempCompressorName();
-
+this.tableFieldStat = new TableFieldStat(parameters);
+this.sortStepRowHandler = new SortStepRowHandler(tableFieldStat);
 this.executorService = Executors.newFixedThreadPool(1);
-this.nullSetWordsLength = ((parameters.getMeasureColCount() - 1) >> 6) + 1;
-comparator = new NewRowComparator(parameters.getNoDictionarySortColumn());
+comparator = new 
IntermediateSortTempRowComparator(parameters.getNoDictionarySortColumn());
 initialize();
   }
 
@@ -169,11 +156,17 @@ public class UnsafeSortTempFileChunkHolder implements 
SortTempChunkHolder {
*
* @throws CarbonSortKeyAndGroupByException problem while reading
*/
+  @Override
   public void readRow() throws CarbonSortKeyAndGroupByException {
 if (prefetch) {
   fillDataForPrefetch();
 } else {
-  this.returnRow = getRowFromStream();
+  try {
+this.returnRow = 
sortStepRowHandler.readIntermediateSortTempRowFromInputStream(stream);
+this.numberOfObjectRead++;
+  } catch (IOException e) {
+throw new CarbonSortKeyAndGroupByException("Problems while reading 
row", e);
+  }
 }
   }
 
@@ -207,63 +200,22 @@ public class UnsafeSortTempFileChunkHolder implements 
SortTempChunkHolder {
   }
 
   /**

[17/50] [abbrv] carbondata git commit: [CARBONDATA-2099] Refactor query scan process to improve readability

2018-03-04 Thread jackylk
http://git-wip-us.apache.org/repos/asf/carbondata/blob/92c9f224/core/src/main/java/org/apache/carbondata/core/datastore/chunk/impl/FixedLengthDimensionDataChunk.java
--
diff --git 
a/core/src/main/java/org/apache/carbondata/core/datastore/chunk/impl/FixedLengthDimensionDataChunk.java
 
b/core/src/main/java/org/apache/carbondata/core/datastore/chunk/impl/FixedLengthDimensionDataChunk.java
deleted file mode 100644
index 6629d31..000
--- 
a/core/src/main/java/org/apache/carbondata/core/datastore/chunk/impl/FixedLengthDimensionDataChunk.java
+++ /dev/null
@@ -1,163 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.carbondata.core.datastore.chunk.impl;
-
-import org.apache.carbondata.core.constants.CarbonCommonConstants;
-import 
org.apache.carbondata.core.datastore.chunk.store.DimensionChunkStoreFactory;
-import 
org.apache.carbondata.core.datastore.chunk.store.DimensionChunkStoreFactory.DimensionStoreType;
-import org.apache.carbondata.core.metadata.datatype.DataType;
-import org.apache.carbondata.core.metadata.datatype.DataTypes;
-import org.apache.carbondata.core.scan.executor.infos.KeyStructureInfo;
-import org.apache.carbondata.core.scan.result.vector.CarbonColumnVector;
-import org.apache.carbondata.core.scan.result.vector.ColumnVectorInfo;
-
-/**
- * This class is gives access to fixed length dimension data chunk store
- */
-public class FixedLengthDimensionDataChunk extends AbstractDimensionDataChunk {
-
-  /**
-   * Constructor
-   *
-   * @param dataChunkdata chunk
-   * @param invertedIndexinverted index
-   * @param invertedIndexReverse reverse inverted index
-   * @param numberOfRows number of rows
-   * @param columnValueSize  size of each column value
-   */
-  public FixedLengthDimensionDataChunk(byte[] dataChunk, int[] invertedIndex,
-  int[] invertedIndexReverse, int numberOfRows, int columnValueSize) {
-long totalSize = null != invertedIndex ?
-dataChunk.length + (2 * numberOfRows * 
CarbonCommonConstants.INT_SIZE_IN_BYTE) :
-dataChunk.length;
-dataChunkStore = DimensionChunkStoreFactory.INSTANCE
-.getDimensionChunkStore(columnValueSize, null != invertedIndex, 
numberOfRows, totalSize,
-DimensionStoreType.FIXEDLENGTH);
-dataChunkStore.putArray(invertedIndex, invertedIndexReverse, dataChunk);
-  }
-
-  /**
-   * Below method will be used to fill the data based on offset and row id
-   *
-   * @param data data to filed
-   * @param offset   offset from which data need to be filed
-   * @param indexrow id of the chunk
-   * @param keyStructureInfo define the structure of the key
-   * @return how many bytes was copied
-   */
-  @Override public int fillChunkData(byte[] data, int offset, int index,
-  KeyStructureInfo keyStructureInfo) {
-dataChunkStore.fillRow(index, data, offset);
-return dataChunkStore.getColumnValueSize();
-  }
-
-  /**
-   * Converts to column dictionary integer value
-   *
-   * @param rowId
-   * @param columnIndex
-   * @param row
-   * @param restructuringInfo
-   * @return
-   */
-  @Override public int fillConvertedChunkData(int rowId, int columnIndex, 
int[] row,
-  KeyStructureInfo restructuringInfo) {
-row[columnIndex] = dataChunkStore.getSurrogate(rowId);
-return columnIndex + 1;
-  }
-
-  /**
-   * Fill the data to vector
-   *
-   * @param vectorInfo
-   * @param column
-   * @param restructuringInfo
-   * @return next column index
-   */
-  @Override public int fillConvertedChunkData(ColumnVectorInfo[] vectorInfo, 
int column,
-  KeyStructureInfo restructuringInfo) {
-ColumnVectorInfo columnVectorInfo = vectorInfo[column];
-int offset = columnVectorInfo.offset;
-int vectorOffset = columnVectorInfo.vectorOffset;
-int len = columnVectorInfo.size + offset;
-CarbonColumnVector vector = columnVectorInfo.vector;
-for (int j = offset; j < len; j++) {
-  int dict = dataChunkStore.getSurrogate(j);
-  if (columnVectorInfo.directDictionaryGenerator == null) {
-vector.putInt(vectorOffset++, dict);
-  } else {
-Object 

[18/20] carbondata git commit: [CARBONDATA-2213][DataMap] Fixed wrong version for module datamap-example

2018-03-04 Thread jackylk
[CARBONDATA-2213][DataMap] Fixed wrong version for module datamap-example

The version of Module ‘carbondata-datamap-example’ should be 1.4.0-snapshot 
instead of 1.3.0-snapshot, otherwise compilation will failed.

This closes #2011


Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo
Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/65cb0d61
Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/65cb0d61
Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/65cb0d61

Branch: refs/heads/datamap-rebase1
Commit: 65cb0d6125a541513fc90640c99d063f913326e7
Parents: 8049185
Author: xuchuanyin 
Authored: Wed Feb 28 10:31:02 2018 +0800
Committer: Jacky Li 
Committed: Sun Mar 4 22:52:05 2018 +0800

--
 datamap/examples/pom.xml|  8 
 .../datamap/examples/MinMaxIndexDataMapFactory.java |  1 -
 .../datamap/lucene/LuceneCoarseGrainDataMap.java|  3 ++-
 .../lucene/LuceneCoarseGrainDataMapFactory.java |  9 ++---
 .../datamap/lucene/LuceneDataMapFactoryBase.java| 16 +++-
 .../datamap/lucene/LuceneDataMapWriter.java |  5 +++--
 .../datamap/lucene/LuceneFineGrainDataMap.java  |  3 ++-
 .../lucene/LuceneFineGrainDataMapFactory.java   |  9 ++---
 8 files changed, 34 insertions(+), 20 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/carbondata/blob/65cb0d61/datamap/examples/pom.xml
--
diff --git a/datamap/examples/pom.xml b/datamap/examples/pom.xml
index 8539a86..30e1522 100644
--- a/datamap/examples/pom.xml
+++ b/datamap/examples/pom.xml
@@ -22,10 +22,10 @@
   4.0.0
 
   
-  org.apache.carbondata
-  carbondata-parent
-  1.4.0-SNAPSHOT
-  ../../pom.xml
+org.apache.carbondata
+carbondata-parent
+1.4.0-SNAPSHOT
+../../pom.xml
   
 
   carbondata-datamap-examples

http://git-wip-us.apache.org/repos/asf/carbondata/blob/65cb0d61/datamap/examples/src/minmaxdatamap/main/java/org/apache/carbondata/datamap/examples/MinMaxIndexDataMapFactory.java
--
diff --git 
a/datamap/examples/src/minmaxdatamap/main/java/org/apache/carbondata/datamap/examples/MinMaxIndexDataMapFactory.java
 
b/datamap/examples/src/minmaxdatamap/main/java/org/apache/carbondata/datamap/examples/MinMaxIndexDataMapFactory.java
index 9fea55b..45dee2a 100644
--- 
a/datamap/examples/src/minmaxdatamap/main/java/org/apache/carbondata/datamap/examples/MinMaxIndexDataMapFactory.java
+++ 
b/datamap/examples/src/minmaxdatamap/main/java/org/apache/carbondata/datamap/examples/MinMaxIndexDataMapFactory.java
@@ -25,7 +25,6 @@ import java.util.List;
 import org.apache.carbondata.core.datamap.DataMapDistributable;
 import org.apache.carbondata.core.datamap.DataMapMeta;
 import org.apache.carbondata.core.datamap.Segment;
-import org.apache.carbondata.core.datamap.dev.AbstractDataMapWriter;
 import org.apache.carbondata.core.datamap.dev.DataMapModel;
 import org.apache.carbondata.core.datamap.dev.DataMapWriter;
 import org.apache.carbondata.core.datamap.dev.cgdatamap.CoarseGrainDataMap;

http://git-wip-us.apache.org/repos/asf/carbondata/blob/65cb0d61/datamap/lucene/src/main/java/org/apache/carbondata/datamap/lucene/LuceneCoarseGrainDataMap.java
--
diff --git 
a/datamap/lucene/src/main/java/org/apache/carbondata/datamap/lucene/LuceneCoarseGrainDataMap.java
 
b/datamap/lucene/src/main/java/org/apache/carbondata/datamap/lucene/LuceneCoarseGrainDataMap.java
index 0b7df86..580f18b 100644
--- 
a/datamap/lucene/src/main/java/org/apache/carbondata/datamap/lucene/LuceneCoarseGrainDataMap.java
+++ 
b/datamap/lucene/src/main/java/org/apache/carbondata/datamap/lucene/LuceneCoarseGrainDataMap.java
@@ -33,6 +33,7 @@ import 
org.apache.carbondata.core.datamap.dev.cgdatamap.CoarseGrainDataMap;
 import org.apache.carbondata.core.datastore.block.SegmentProperties;
 import org.apache.carbondata.core.datastore.impl.FileFactory;
 import org.apache.carbondata.core.indexstore.Blocklet;
+import org.apache.carbondata.core.indexstore.PartitionSpec;
 import org.apache.carbondata.core.memory.MemoryException;
 import org.apache.carbondata.core.scan.filter.resolver.FilterResolverIntf;
 
@@ -133,7 +134,7 @@ public class LuceneCoarseGrainDataMap extends 
CoarseGrainDataMap {
*/
   @Override
   public List prune(FilterResolverIntf filterExp, SegmentProperties 
segmentProperties,
-  List partitions) throws IOException {
+  List partitions) throws IOException {
 
 // convert filter expr into lucene list query
 List fields = new ArrayList();


[02/20] carbondata git commit: [CARBONDATA-1543] Supported DataMap chooser and expression for supporting multiple datamaps in single query

2018-03-04 Thread jackylk
[CARBONDATA-1543] Supported DataMap chooser and expression for supporting 
multiple datamaps in single query

This PR supports 3 features.

1.Load datamaps from the DataMapSchema which are created through DDL.
2.DataMap Chooser: It chooses the datamap out of available datamaps based on 
simple logic. Like if there is filter condition on column1 then for supposing 2 
datamaps(1. column1 2. column1+column2) are supporting this column then we 
choose the datamap which has fewer columns that is the first datamap.
3.Expression support: Based on the filter expressions we convert them to the 
possible DataMap expressions and do apply expression on it.
For example, there are 2 datamaps available on table1
Datamap1 : column1
Datamap2 : column2
Query: select * from table1 where column1 ='a' and column2 =b
For the above query, we create datamap expression as 
AndDataMapExpression(Datamap1, DataMap2). So for the above query both the 
datamaps are included and the output of them will be applied AND condition to 
improve the performance

This closes #1510


Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo
Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/c7a9f15e
Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/c7a9f15e
Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/c7a9f15e

Branch: refs/heads/datamap-rebase1
Commit: c7a9f15e2daa0207862aa28c44c51cc7cc081bac
Parents: 8104735
Author: Ravindra Pesala 
Authored: Tue Nov 21 15:49:11 2017 +0530
Committer: Jacky Li 
Committed: Sun Mar 4 22:00:18 2018 +0800

--
 .../exceptions/MetadataProcessException.java|  37 +++
 .../carbondata/core/datamap/DataMapChooser.java | 284 +++
 .../core/datamap/DataMapDistributable.java  |  21 +-
 .../core/datamap/DataMapStoreManager.java   | 148 +++---
 .../carbondata/core/datamap/TableDataMap.java   |  23 +-
 .../core/datamap/dev/DataMapFactory.java|   3 +-
 .../datamap/dev/expr/AndDataMapExprWrapper.java |  99 +++
 .../dev/expr/DataMapDistributableWrapper.java   |  56 
 .../datamap/dev/expr/DataMapExprWrapper.java|  79 ++
 .../dev/expr/DataMapExprWrapperImpl.java|  88 ++
 .../datamap/dev/expr/OrDataMapExprWrapper.java  |  96 +++
 .../carbondata/core/datastore/TableSpec.java|  14 +-
 .../carbondata/core/indexstore/Blocklet.java|  20 ++
 .../core/indexstore/ExtendedBlocklet.java   |  33 ++-
 .../core/indexstore/FineGrainBlocklet.java  |   8 +
 .../blockletindex/BlockletDataMap.java  |   2 -
 .../blockletindex/BlockletDataMapFactory.java   |   9 +-
 .../conditional/StartsWithExpression.java   |  72 +
 .../scan/filter/FilterExpressionProcessor.java  |  20 +-
 .../core/scan/filter/intf/ExpressionType.java   |   6 +-
 .../statusmanager/SegmentStatusManager.java |   5 +-
 .../datamap/examples/MinMaxDataMapFactory.java  |   3 +-
 .../hadoop/api/CarbonTableInputFormat.java  |  32 ++-
 .../carbondata/hadoop/api/DataMapJob.java   |   2 +-
 .../hadoop/api/DistributableDataMapFormat.java  |  32 ++-
 .../preaggregate/TestPreAggCreateCommand.scala  |   6 +-
 .../timeseries/TestTimeSeriesCreateTable.scala  |   5 +-
 ...CompactionSupportGlobalSortBigFileTest.scala |   2 +-
 .../testsuite/dataload/TestLoadDataFrame.scala  |  24 +-
 .../testsuite/datamap/CGDataMapTestCase.scala   |  52 ++--
 .../testsuite/datamap/DataMapWriterSuite.scala  |  17 +-
 .../testsuite/datamap/FGDataMapTestCase.scala   |  68 +++--
 .../testsuite/datamap/TestDataMapCommand.scala  |  72 ++---
 .../iud/InsertOverwriteConcurrentTest.scala |   0
 .../carbondata/spark/rdd/SparkDataMapJob.scala  |   6 +-
 .../org/apache/spark/sql/CarbonSource.scala |   8 +-
 .../spark/sql/SparkUnknownExpression.scala  |   6 +-
 .../datamap/CarbonCreateDataMapCommand.scala|  91 +++---
 .../datamap/CarbonDropDataMapCommand.scala  |   3 +-
 .../CreatePreAggregateTableCommand.scala|  29 +-
 .../preaaggregate/PreAggregateUtil.scala|  12 +-
 .../strategy/CarbonLateDecodeStrategy.scala |   2 +-
 .../spark/sql/optimizer/CarbonFilters.scala |  20 +-
 .../datamap/DataMapWriterListener.java  |   6 +-
 .../loading/DataLoadProcessBuilder.java |   2 +-
 .../store/CarbonFactDataHandlerModel.java   |  12 +-
 46 files changed, 1316 insertions(+), 319 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/carbondata/blob/c7a9f15e/common/src/main/java/org/apache/carbondata/common/exceptions/MetadataProcessException.java
--
diff --git 
a/common/src/main/java/org/apache/carbondata/common/exceptions/MetadataProcessException.java
 
b/common/src/main/java/org/apache/carbondata/common/exceptions/MetadataProcessException.java
new file mode 100644
index 

[09/20] carbondata git commit: [HOTFIX] Add dava doc for datamap interface

2018-03-04 Thread jackylk
http://git-wip-us.apache.org/repos/asf/carbondata/blob/1134431d/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/datamap/TestIndexDataMapCommand.scala
--
diff --git 
a/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/datamap/TestIndexDataMapCommand.scala
 
b/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/datamap/TestIndexDataMapCommand.scala
deleted file mode 100644
index a05a8c2..000
--- 
a/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/datamap/TestIndexDataMapCommand.scala
+++ /dev/null
@@ -1,285 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.carbondata.spark.testsuite.datamap
-
-import java.io.{File, FilenameFilter}
-
-import org.apache.spark.sql.Row
-import org.apache.spark.sql.test.util.QueryTest
-import org.scalatest.BeforeAndAfterAll
-
-import org.apache.carbondata.common.exceptions.MetadataProcessException
-import 
org.apache.carbondata.common.exceptions.sql.{MalformedDataMapCommandException, 
NoSuchDataMapException}
-import org.apache.carbondata.core.constants.CarbonCommonConstants
-import org.apache.carbondata.core.metadata.CarbonMetadata
-import org.apache.carbondata.core.util.CarbonProperties
-import org.apache.carbondata.core.util.path.CarbonTablePath
-
-class TestIndexDataMapCommand extends QueryTest with BeforeAndAfterAll {
-
-  val testData = s"$resourcesPath/sample.csv"
-
-  override def beforeAll {
-sql("drop table if exists datamaptest")
-sql("drop table if exists datamapshowtest")
-sql("drop table if exists uniqdata")
-sql("create table datamaptest (a string, b string, c string) stored by 
'carbondata'")
-  }
-
-  val newClass = "org.apache.spark.sql.CarbonSource"
-
-  test("test datamap create: don't support using non-exist class") {
-intercept[MetadataProcessException] {
-  sql(s"CREATE DATAMAP datamap1 ON TABLE datamaptest USING '$newClass'")
-}
-  }
-
-  test("test datamap create with dmproperties: don't support using non-exist 
class") {
-intercept[MetadataProcessException] {
-  sql(s"CREATE DATAMAP datamap2 ON TABLE datamaptest USING '$newClass' 
DMPROPERTIES('key'='value')")
-}
-  }
-
-  test("test datamap create with existing name: don't support using non-exist 
class") {
-intercept[MetadataProcessException] {
-  sql(
-s"CREATE DATAMAP datamap2 ON TABLE datamaptest USING '$newClass' 
DMPROPERTIES('key'='value')")
-}
-  }
-
-  test("test datamap create with preagg") {
-sql("drop datamap if exists datamap3 on table datamaptest")
-sql(
-  "create datamap datamap3 on table datamaptest using 'preaggregate' as 
select count(a) from datamaptest")
-val table = CarbonMetadata.getInstance().getCarbonTable("default", 
"datamaptest")
-assert(table != null)
-val dataMapSchemaList = table.getTableInfo.getDataMapSchemaList
-assert(dataMapSchemaList.size() == 1)
-assert(dataMapSchemaList.get(0).getDataMapName.equals("datamap3"))
-
assert(dataMapSchemaList.get(0).getChildSchema.getTableName.equals("datamaptest_datamap3"))
-  }
-
-  test("check hivemetastore after drop datamap") {
-try {
-  CarbonProperties.getInstance()
-.addProperty(CarbonCommonConstants.ENABLE_HIVE_SCHEMA_META_STORE,
-  "true")
-  sql("drop table if exists hiveMetaStoreTable")
-  sql("create table hiveMetaStoreTable (a string, b string, c string) 
stored by 'carbondata'")
-
-  sql(
-"create datamap datamap_hiveMetaStoreTable on table hiveMetaStoreTable 
using 'preaggregate' as select count(a) from hiveMetaStoreTable")
-  checkExistence(sql("show datamap on table hiveMetaStoreTable"), true, 
"datamap_hiveMetaStoreTable")
-
-  sql("drop datamap datamap_hiveMetaStoreTable on table 
hiveMetaStoreTable")
-  checkExistence(sql("show datamap on table hiveMetaStoreTable"), false, 
"datamap_hiveMetaStoreTable")
-
-} finally {
-  sql("drop table hiveMetaStoreTable")
-  CarbonProperties.getInstance()
-.addProperty(CarbonCommonConstants.ENABLE_HIVE_SCHEMA_META_STORE,
- 

[04/20] carbondata git commit: [CARBONDATA-2189] Add DataMapProvider developer interface

2018-03-04 Thread jackylk
http://git-wip-us.apache.org/repos/asf/carbondata/blob/f8ded96e/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/datamap/IndexDataMapWriterSuite.scala
--
diff --git 
a/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/datamap/IndexDataMapWriterSuite.scala
 
b/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/datamap/IndexDataMapWriterSuite.scala
new file mode 100644
index 000..795ef6a
--- /dev/null
+++ 
b/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/datamap/IndexDataMapWriterSuite.scala
@@ -0,0 +1,217 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.carbondata.spark.testsuite.datamap
+
+import java.util
+
+import scala.collection.JavaConverters._
+
+import org.apache.spark.sql.test.util.QueryTest
+import org.apache.spark.sql.{DataFrame, SaveMode}
+import org.scalatest.BeforeAndAfterAll
+
+import org.apache.carbondata.core.constants.CarbonCommonConstants
+import org.apache.carbondata.core.datamap.{DataMapDistributable, DataMapMeta, 
DataMapStoreManager, Segment}
+import org.apache.carbondata.core.datamap.dev.{AbstractDataMapWriter}
+import org.apache.carbondata.core.datamap.dev.AbstractDataMapWriter
+import 
org.apache.carbondata.core.datamap.dev.cgdatamap.{AbstractCoarseGrainIndexDataMap,
 AbstractCoarseGrainIndexDataMapFactory}
+import org.apache.carbondata.core.datamap.{DataMapDistributable, DataMapMeta}
+import org.apache.carbondata.core.datastore.page.ColumnPage
+import org.apache.carbondata.core.metadata.AbsoluteTableIdentifier
+import org.apache.carbondata.core.metadata.schema.table.DataMapSchema
+import org.apache.carbondata.core.scan.filter.intf.ExpressionType
+import org.apache.carbondata.core.util.CarbonProperties
+import org.apache.carbondata.events.Event
+
+class C2IndexDataMapFactory() extends AbstractCoarseGrainIndexDataMapFactory {
+
+  var identifier: AbsoluteTableIdentifier = _
+
+  override def init(identifier: AbsoluteTableIdentifier,
+  dataMapSchema: DataMapSchema): Unit = {
+this.identifier = identifier
+  }
+
+  override def fireEvent(event: Event): Unit = ???
+
+  override def clear(segment: Segment): Unit = {}
+
+  override def clear(): Unit = {}
+
+  override def getDataMaps(distributable: DataMapDistributable): 
util.List[AbstractCoarseGrainIndexDataMap] = ???
+
+  override def getDataMaps(segment: Segment): 
util.List[AbstractCoarseGrainIndexDataMap] = ???
+
+  override def createWriter(segment: Segment, dataWritePath: String): 
AbstractDataMapWriter =
+IndexDataMapWriterSuite.dataMapWriterC2Mock(identifier, segment, 
dataWritePath)
+
+  override def getMeta: DataMapMeta = new DataMapMeta(List("c2").asJava, 
List(ExpressionType.EQUALS).asJava)
+
+  /**
+   * Get all distributable objects of a segmentid
+   *
+   * @return
+   */
+  override def toDistributable(segmentId: Segment): 
util.List[DataMapDistributable] = {
+???
+  }
+
+}
+
+class IndexDataMapWriterSuite extends QueryTest with BeforeAndAfterAll {
+  def buildTestData(numRows: Int): DataFrame = {
+import sqlContext.implicits._
+sqlContext.sparkContext.parallelize(1 to numRows, 1)
+  .map(x => ("a" + x, "b", x))
+  .toDF("c1", "c2", "c3")
+  }
+
+  def dropTable(): Unit = {
+sql("DROP TABLE IF EXISTS carbon1")
+sql("DROP TABLE IF EXISTS carbon2")
+  }
+
+  override def beforeAll {
+dropTable()
+  }
+
+  test("test write datamap 2 pages") {
+sql(s"CREATE TABLE carbon1(c1 STRING, c2 STRING, c3 INT) STORED BY 
'org.apache.carbondata.format'")
+// register datamap writer
+sql(s"CREATE DATAMAP test ON TABLE carbon1 USING 
'${classOf[C2IndexDataMapFactory].getName}'")
+val df = buildTestData(33000)
+
+// save dataframe to carbon file
+df.write
+  .format("carbondata")
+  .option("tableName", "carbon1")
+  .option("tempCSV", "false")
+  .option("sort_columns","c1")
+  .mode(SaveMode.Overwrite)
+  .save()
+
+assert(IndexDataMapWriterSuite.callbackSeq.head.contains("block start"))
+assert(IndexDataMapWriterSuite.callbackSeq.last.contains("block 

[11/20] carbondata git commit: [HOTFIX] Add dava doc for datamap interface

2018-03-04 Thread jackylk
http://git-wip-us.apache.org/repos/asf/carbondata/blob/1134431d/core/src/main/java/org/apache/carbondata/core/indexstore/blockletindex/BlockletIndexDataMap.java
--
diff --git 
a/core/src/main/java/org/apache/carbondata/core/indexstore/blockletindex/BlockletIndexDataMap.java
 
b/core/src/main/java/org/apache/carbondata/core/indexstore/blockletindex/BlockletIndexDataMap.java
deleted file mode 100644
index 34e11ac..000
--- 
a/core/src/main/java/org/apache/carbondata/core/indexstore/blockletindex/BlockletIndexDataMap.java
+++ /dev/null
@@ -1,971 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.carbondata.core.indexstore.blockletindex;
-
-import java.io.ByteArrayInputStream;
-import java.io.ByteArrayOutputStream;
-import java.io.DataInputStream;
-import java.io.DataOutput;
-import java.io.DataOutputStream;
-import java.io.IOException;
-import java.io.UnsupportedEncodingException;
-import java.math.BigDecimal;
-import java.nio.ByteBuffer;
-import java.util.ArrayList;
-import java.util.BitSet;
-import java.util.Comparator;
-import java.util.List;
-
-import org.apache.carbondata.common.logging.LogService;
-import org.apache.carbondata.common.logging.LogServiceFactory;
-import org.apache.carbondata.core.cache.Cacheable;
-import org.apache.carbondata.core.constants.CarbonCommonConstants;
-import org.apache.carbondata.core.datamap.dev.DataMapModel;
-import 
org.apache.carbondata.core.datamap.dev.cgdatamap.AbstractCoarseGrainIndexDataMap;
-import org.apache.carbondata.core.datastore.IndexKey;
-import org.apache.carbondata.core.datastore.block.SegmentProperties;
-import org.apache.carbondata.core.datastore.block.TableBlockInfo;
-import org.apache.carbondata.core.indexstore.BlockMetaInfo;
-import org.apache.carbondata.core.indexstore.Blocklet;
-import org.apache.carbondata.core.indexstore.BlockletDetailInfo;
-import org.apache.carbondata.core.indexstore.ExtendedBlocklet;
-import org.apache.carbondata.core.indexstore.PartitionSpec;
-import org.apache.carbondata.core.indexstore.UnsafeMemoryDMStore;
-import org.apache.carbondata.core.indexstore.row.DataMapRow;
-import org.apache.carbondata.core.indexstore.row.DataMapRowImpl;
-import org.apache.carbondata.core.indexstore.schema.CarbonRowSchema;
-import org.apache.carbondata.core.memory.MemoryException;
-import org.apache.carbondata.core.metadata.blocklet.BlockletInfo;
-import org.apache.carbondata.core.metadata.blocklet.DataFileFooter;
-import org.apache.carbondata.core.metadata.blocklet.index.BlockletIndex;
-import org.apache.carbondata.core.metadata.blocklet.index.BlockletMinMaxIndex;
-import org.apache.carbondata.core.metadata.datatype.DataType;
-import org.apache.carbondata.core.metadata.datatype.DataTypes;
-import org.apache.carbondata.core.metadata.schema.table.column.CarbonMeasure;
-import org.apache.carbondata.core.metadata.schema.table.column.ColumnSchema;
-import org.apache.carbondata.core.scan.filter.FilterExpressionProcessor;
-import org.apache.carbondata.core.scan.filter.FilterUtil;
-import org.apache.carbondata.core.scan.filter.executer.FilterExecuter;
-import 
org.apache.carbondata.core.scan.filter.executer.ImplicitColumnFilterExecutor;
-import org.apache.carbondata.core.scan.filter.resolver.FilterResolverIntf;
-import org.apache.carbondata.core.util.ByteUtil;
-import org.apache.carbondata.core.util.CarbonUtil;
-import org.apache.carbondata.core.util.DataFileFooterConverter;
-import org.apache.carbondata.core.util.DataTypeUtil;
-import org.apache.carbondata.core.util.path.CarbonTablePath;
-
-import org.apache.commons.lang3.StringUtils;
-import org.apache.hadoop.fs.Path;
-import org.xerial.snappy.Snappy;
-
-/**
- * Datamap implementation for blocklet.
- */
-public class BlockletIndexDataMap extends AbstractCoarseGrainIndexDataMap 
implements Cacheable {
-
-  private static final LogService LOGGER =
-  LogServiceFactory.getLogService(BlockletIndexDataMap.class.getName());
-
-  private static int KEY_INDEX = 0;
-
-  private static int MIN_VALUES_INDEX = 1;
-
-  private static int MAX_VALUES_INDEX = 2;
-
-  private static int ROW_COUNT_INDEX = 3;
-
-  private static int FILE_PATH_INDEX = 4;
-
-  private 

[03/20] carbondata git commit: [CARBONDATA-2189] Add DataMapProvider developer interface

2018-03-04 Thread jackylk
http://git-wip-us.apache.org/repos/asf/carbondata/blob/f8ded96e/integration/spark2/src/main/scala/org/apache/spark/sql/execution/command/preaaggregate/PreAggregateTableHelper.scala
--
diff --git 
a/integration/spark2/src/main/scala/org/apache/spark/sql/execution/command/preaaggregate/PreAggregateTableHelper.scala
 
b/integration/spark2/src/main/scala/org/apache/spark/sql/execution/command/preaaggregate/PreAggregateTableHelper.scala
new file mode 100644
index 000..4c0e637
--- /dev/null
+++ 
b/integration/spark2/src/main/scala/org/apache/spark/sql/execution/command/preaaggregate/PreAggregateTableHelper.scala
@@ -0,0 +1,195 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution.command.preaaggregate
+
+import scala.collection.JavaConverters._
+import scala.collection.mutable
+
+import org.apache.spark.sql._
+import org.apache.spark.sql.catalyst.TableIdentifier
+import org.apache.spark.sql.execution.command._
+import org.apache.spark.sql.execution.command.management.CarbonLoadDataCommand
+import org.apache.spark.sql.execution.command.table.CarbonCreateTableCommand
+import org.apache.spark.sql.execution.command.timeseries.TimeSeriesUtil
+import org.apache.spark.sql.optimizer.CarbonFilters
+import org.apache.spark.sql.parser.CarbonSpark2SqlParser
+
+import 
org.apache.carbondata.common.exceptions.sql.MalformedDataMapCommandException
+import org.apache.carbondata.core.constants.CarbonCommonConstants
+import org.apache.carbondata.core.metadata.schema.datamap.DataMapProvider
+import 
org.apache.carbondata.core.metadata.schema.table.AggregationDataMapSchema
+import org.apache.carbondata.core.metadata.schema.table.CarbonTable
+import org.apache.carbondata.core.metadata.schema.datamap.DataMapProvider
+import 
org.apache.carbondata.core.metadata.schema.table.{AggregationDataMapSchema, 
CarbonTable, DataMapSchema}
+import org.apache.carbondata.core.statusmanager.{SegmentStatus, 
SegmentStatusManager}
+
+/**
+ * Below helper class will be used to create pre-aggregate table
+ * and updating the parent table about the child table information
+ * It will be either success or nothing happen in case of failure:
+ * 1. failed to create pre aggregate table.
+ * 2. failed to update main table
+ *
+ */
+case class PreAggregateTableHelper(
+var parentTable: CarbonTable,
+dataMapName: String,
+dataMapClassName: String,
+dataMapProperties: java.util.Map[String, String],
+queryString: String,
+timeSeriesFunction: Option[String] = None,
+ifNotExistsSet: Boolean = false) {
+
+  var loadCommand: CarbonLoadDataCommand = _
+
+  def initMeta(sparkSession: SparkSession): Seq[Row] = {
+val dmProperties = dataMapProperties.asScala
+val updatedQuery = new 
CarbonSpark2SqlParser().addPreAggFunction(queryString)
+val df = sparkSession.sql(updatedQuery)
+val fieldRelationMap = 
PreAggregateUtil.validateActualSelectPlanAndGetAttributes(
+  df.logicalPlan, queryString)
+val fields = fieldRelationMap.keySet.toSeq
+val tableProperties = mutable.Map[String, String]()
+dmProperties.foreach(t => tableProperties.put(t._1, t._2))
+
+val selectTable = PreAggregateUtil.getParentCarbonTable(df.logicalPlan)
+if (!parentTable.getTableName.equalsIgnoreCase(selectTable.getTableName)) {
+  throw new MalformedDataMapCommandException(
+"Parent table name is different in select and create")
+}
+var neworder = Seq[String]()
+val parentOrder = 
parentTable.getSortColumns(parentTable.getTableName).asScala
+parentOrder.foreach(parentcol =>
+  fields.filter(col => fieldRelationMap(col).aggregateFunction.isEmpty &&
+   parentcol.equals(fieldRelationMap(col).
+ columnTableRelationList.get(0).parentColumnName))
+.map(cols => neworder :+= cols.column)
+)
+tableProperties.put(CarbonCommonConstants.SORT_COLUMNS, 
neworder.mkString(","))
+tableProperties.put("sort_scope", parentTable.getTableInfo.getFactTable.
+  getTableProperties.asScala.getOrElse("sort_scope", CarbonCommonConstants
+  .LOAD_SORT_SCOPE_DEFAULT))
+tableProperties
+   

[06/20] carbondata git commit: [CARBONDATA-2189] Add DataMapProvider developer interface

2018-03-04 Thread jackylk
http://git-wip-us.apache.org/repos/asf/carbondata/blob/f8ded96e/core/src/main/java/org/apache/carbondata/core/indexstore/blockletindex/BlockletIndexDataMap.java
--
diff --git 
a/core/src/main/java/org/apache/carbondata/core/indexstore/blockletindex/BlockletIndexDataMap.java
 
b/core/src/main/java/org/apache/carbondata/core/indexstore/blockletindex/BlockletIndexDataMap.java
new file mode 100644
index 000..34e11ac
--- /dev/null
+++ 
b/core/src/main/java/org/apache/carbondata/core/indexstore/blockletindex/BlockletIndexDataMap.java
@@ -0,0 +1,971 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.carbondata.core.indexstore.blockletindex;
+
+import java.io.ByteArrayInputStream;
+import java.io.ByteArrayOutputStream;
+import java.io.DataInputStream;
+import java.io.DataOutput;
+import java.io.DataOutputStream;
+import java.io.IOException;
+import java.io.UnsupportedEncodingException;
+import java.math.BigDecimal;
+import java.nio.ByteBuffer;
+import java.util.ArrayList;
+import java.util.BitSet;
+import java.util.Comparator;
+import java.util.List;
+
+import org.apache.carbondata.common.logging.LogService;
+import org.apache.carbondata.common.logging.LogServiceFactory;
+import org.apache.carbondata.core.cache.Cacheable;
+import org.apache.carbondata.core.constants.CarbonCommonConstants;
+import org.apache.carbondata.core.datamap.dev.DataMapModel;
+import 
org.apache.carbondata.core.datamap.dev.cgdatamap.AbstractCoarseGrainIndexDataMap;
+import org.apache.carbondata.core.datastore.IndexKey;
+import org.apache.carbondata.core.datastore.block.SegmentProperties;
+import org.apache.carbondata.core.datastore.block.TableBlockInfo;
+import org.apache.carbondata.core.indexstore.BlockMetaInfo;
+import org.apache.carbondata.core.indexstore.Blocklet;
+import org.apache.carbondata.core.indexstore.BlockletDetailInfo;
+import org.apache.carbondata.core.indexstore.ExtendedBlocklet;
+import org.apache.carbondata.core.indexstore.PartitionSpec;
+import org.apache.carbondata.core.indexstore.UnsafeMemoryDMStore;
+import org.apache.carbondata.core.indexstore.row.DataMapRow;
+import org.apache.carbondata.core.indexstore.row.DataMapRowImpl;
+import org.apache.carbondata.core.indexstore.schema.CarbonRowSchema;
+import org.apache.carbondata.core.memory.MemoryException;
+import org.apache.carbondata.core.metadata.blocklet.BlockletInfo;
+import org.apache.carbondata.core.metadata.blocklet.DataFileFooter;
+import org.apache.carbondata.core.metadata.blocklet.index.BlockletIndex;
+import org.apache.carbondata.core.metadata.blocklet.index.BlockletMinMaxIndex;
+import org.apache.carbondata.core.metadata.datatype.DataType;
+import org.apache.carbondata.core.metadata.datatype.DataTypes;
+import org.apache.carbondata.core.metadata.schema.table.column.CarbonMeasure;
+import org.apache.carbondata.core.metadata.schema.table.column.ColumnSchema;
+import org.apache.carbondata.core.scan.filter.FilterExpressionProcessor;
+import org.apache.carbondata.core.scan.filter.FilterUtil;
+import org.apache.carbondata.core.scan.filter.executer.FilterExecuter;
+import 
org.apache.carbondata.core.scan.filter.executer.ImplicitColumnFilterExecutor;
+import org.apache.carbondata.core.scan.filter.resolver.FilterResolverIntf;
+import org.apache.carbondata.core.util.ByteUtil;
+import org.apache.carbondata.core.util.CarbonUtil;
+import org.apache.carbondata.core.util.DataFileFooterConverter;
+import org.apache.carbondata.core.util.DataTypeUtil;
+import org.apache.carbondata.core.util.path.CarbonTablePath;
+
+import org.apache.commons.lang3.StringUtils;
+import org.apache.hadoop.fs.Path;
+import org.xerial.snappy.Snappy;
+
+/**
+ * Datamap implementation for blocklet.
+ */
+public class BlockletIndexDataMap extends AbstractCoarseGrainIndexDataMap 
implements Cacheable {
+
+  private static final LogService LOGGER =
+  LogServiceFactory.getLogService(BlockletIndexDataMap.class.getName());
+
+  private static int KEY_INDEX = 0;
+
+  private static int MIN_VALUES_INDEX = 1;
+
+  private static int MAX_VALUES_INDEX = 2;
+
+  private static int ROW_COUNT_INDEX = 3;
+
+  private static int FILE_PATH_INDEX = 4;
+
+  private static 

[14/20] carbondata git commit: [HOTFIX] Fix timestamp issue in TestSortColumnsWithUnsafe

2018-03-04 Thread jackylk
[HOTFIX] Fix timestamp issue in TestSortColumnsWithUnsafe

Fix timestamp issue in TestSortColumnsWithUnsafe

This closes #2001


Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo
Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/96ee82b3
Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/96ee82b3
Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/96ee82b3

Branch: refs/heads/datamap-rebase1
Commit: 96ee82b336b178c7a0d5a6d8aa8fd14afc7bc27e
Parents: 1134431
Author: Jacky Li 
Authored: Tue Feb 27 13:06:02 2018 +0800
Committer: Jacky Li 
Committed: Sun Mar 4 22:43:49 2018 +0800

--
 .../testsuite/sortcolumns/TestSortColumns.scala | 14 +--
 .../sortcolumns/TestSortColumnsWithUnsafe.scala | 25 +++-
 2 files changed, 15 insertions(+), 24 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/carbondata/blob/96ee82b3/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/sortcolumns/TestSortColumns.scala
--
diff --git 
a/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/sortcolumns/TestSortColumns.scala
 
b/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/sortcolumns/TestSortColumns.scala
index 13db652..adf8423 100644
--- 
a/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/sortcolumns/TestSortColumns.scala
+++ 
b/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/sortcolumns/TestSortColumns.scala
@@ -37,7 +37,7 @@ class TestSortColumns extends QueryTest with 
BeforeAndAfterAll {
 CarbonProperties.getInstance()
   .addProperty(CarbonCommonConstants.CARBON_TIMESTAMP_FORMAT, "dd-MM-")
 sql("CREATE TABLE origintable1 (empno int, empname String, designation 
String, doj Timestamp, workgroupcategory int, workgroupcategoryname String, 
deptno int, deptname String, projectcode int, projectjoindate Timestamp, 
projectenddate Timestamp,attendance int,utilization int,salary int) STORED BY 
'org.apache.carbondata.format'")
-sql(s"""LOAD DATA local inpath '$resourcesPath/data.csv' INTO TABLE 
origintable1 OPTIONS('DELIMITER'= ',', 'QUOTECHAR'= '\"')""")
+sql(s"""LOAD DATA local inpath '$resourcesPath/data.csv' INTO TABLE 
origintable1 OPTIONS('DELIMITER'= ',', 'QUOTECHAR'= '\"', 
'TIMESTAMPFORMAT'='dd-MM-')""")
 
 sql("CREATE TABLE tableOne(id int, name string, city string, age int) 
STORED BY 'org.apache.carbondata.format'")
 sql("CREATE TABLE tableTwo(id int, age int) STORED BY 
'org.apache.carbondata.format'")
@@ -244,7 +244,7 @@ class TestSortColumns extends QueryTest with 
BeforeAndAfterAll {
   sql(s"""LOAD DATA local inpath '$resourcesPath/data.csv' INTO TABLE 
origintable1 OPTIONS('DELIMITER'= ',', 'QUOTECHAR'= '\"')""")
   setLoadingProperties("false", "false", "false")
   sql("CREATE TABLE unsortedtable_heap_safe (empno int, empname String, 
designation String, doj Timestamp, workgroupcategory int, workgroupcategoryname 
String, deptno int, deptname String, projectcode int, projectjoindate 
Timestamp, projectenddate Timestamp,attendance int,utilization int,salary int) 
STORED BY 'org.apache.carbondata.format' tblproperties('sort_columns'='')")
-  sql(s"""LOAD DATA local inpath '$resourcesPath/data.csv' INTO TABLE 
unsortedtable_heap_safe OPTIONS('DELIMITER'= ',', 'QUOTECHAR'= '\"')""")
+  sql(s"""LOAD DATA local inpath '$resourcesPath/data.csv' INTO TABLE 
unsortedtable_heap_safe OPTIONS('DELIMITER'= ',', 'QUOTECHAR'= '\"', 
'TIMESTAMPFORMAT'='dd-MM-')""")
   checkAnswer(sql("select * from unsortedtable_heap_safe where empno = 
11"), sql("select * from origintable1 where empno = 11"))
   checkAnswer(sql("select * from unsortedtable_heap_safe order by empno"), 
sql("select * from origintable1 order by empno"))
 } finally {
@@ -259,7 +259,7 @@ class TestSortColumns extends QueryTest with 
BeforeAndAfterAll {
   sql(s"""LOAD DATA local inpath '$resourcesPath/data.csv' INTO TABLE 
origintable1 OPTIONS('DELIMITER'= ',', 'QUOTECHAR'= '\"')""")
   setLoadingProperties("false", "true", "false")
   sql("CREATE TABLE unsortedtable_heap_unsafe (empno int, empname String, 
designation String, doj Timestamp, workgroupcategory int, workgroupcategoryname 
String, deptno int, deptname String, projectcode int, projectjoindate 
Timestamp, projectenddate Timestamp,attendance int,utilization int,salary int) 
STORED BY 'org.apache.carbondata.format' tblproperties('sort_columns'='')")
-  sql(s"""LOAD DATA local inpath '$resourcesPath/data.csv' INTO TABLE 
unsortedtable_heap_unsafe OPTIONS('DELIMITER'= ',', 'QUOTECHAR'= '\"')""")
+  sql(s"""LOAD DATA local inpath 

[12/20] carbondata git commit: [HOTFIX] Add dava doc for datamap interface

2018-03-04 Thread jackylk
http://git-wip-us.apache.org/repos/asf/carbondata/blob/1134431d/core/src/main/java/org/apache/carbondata/core/indexstore/FineGrainBlocklet.java
--
diff --git 
a/core/src/main/java/org/apache/carbondata/core/indexstore/FineGrainBlocklet.java
 
b/core/src/main/java/org/apache/carbondata/core/indexstore/FineGrainBlocklet.java
deleted file mode 100644
index 229e5bf..000
--- 
a/core/src/main/java/org/apache/carbondata/core/indexstore/FineGrainBlocklet.java
+++ /dev/null
@@ -1,128 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.carbondata.core.indexstore;
-
-import java.io.DataInput;
-import java.io.DataOutput;
-import java.io.IOException;
-import java.io.Serializable;
-import java.util.ArrayList;
-import java.util.BitSet;
-import java.util.List;
-
-import org.apache.carbondata.core.constants.CarbonV3DataFormatConstants;
-import org.apache.carbondata.core.metadata.schema.table.Writable;
-import org.apache.carbondata.core.util.BitSetGroup;
-
-/**
- * FineGrainBlocklet
- */
-public class FineGrainBlocklet extends Blocklet implements Serializable {
-
-  private List pages;
-
-  public FineGrainBlocklet(String blockId, String blockletId, List 
pages) {
-super(blockId, blockletId);
-this.pages = pages;
-  }
-
-  // For serialization purpose
-  public FineGrainBlocklet() {
-
-  }
-
-  public List getPages() {
-return pages;
-  }
-
-  public static class Page implements Writable,Serializable {
-
-private int pageId;
-
-private int[] rowId;
-
-public BitSet getBitSet() {
-  BitSet bitSet =
-  new 
BitSet(CarbonV3DataFormatConstants.NUMBER_OF_ROWS_PER_BLOCKLET_COLUMN_PAGE_DEFAULT);
-  for (int row : rowId) {
-bitSet.set(row);
-  }
-  return bitSet;
-}
-
-@Override public void write(DataOutput out) throws IOException {
-  out.writeInt(pageId);
-  out.writeInt(rowId.length);
-  for (int i = 0; i < rowId.length; i++) {
-out.writeInt(rowId[i]);
-  }
-}
-
-@Override public void readFields(DataInput in) throws IOException {
-  pageId = in.readInt();
-  int length = in.readInt();
-  rowId = new int[length];
-  for (int i = 0; i < length; i++) {
-rowId[i] = in.readInt();
-  }
-}
-
-public void setPageId(int pageId) {
-  this.pageId = pageId;
-}
-
-public void setRowId(int[] rowId) {
-  this.rowId = rowId;
-}
-  }
-
-  public BitSetGroup getBitSetGroup(int numberOfPages) {
-BitSetGroup bitSetGroup = new BitSetGroup(numberOfPages);
-for (int i = 0; i < pages.size(); i++) {
-  bitSetGroup.setBitSet(pages.get(i).getBitSet(), pages.get(i).pageId);
-}
-return bitSetGroup;
-  }
-
-  @Override public void write(DataOutput out) throws IOException {
-super.write(out);
-int size = pages.size();
-out.writeInt(size);
-for (Page page : pages) {
-  page.write(out);
-}
-  }
-
-  @Override public void readFields(DataInput in) throws IOException {
-super.readFields(in);
-int size = in.readInt();
-pages = new ArrayList<>(size);
-for (int i = 0; i < size; i++) {
-  Page page = new Page();
-  page.readFields(in);
-  pages.add(page);
-}
-  }
-
-  @Override public boolean equals(Object o) {
-return super.equals(o);
-  }
-
-  @Override public int hashCode() {
-return super.hashCode();
-  }
-}

http://git-wip-us.apache.org/repos/asf/carbondata/blob/1134431d/core/src/main/java/org/apache/carbondata/core/indexstore/blockletindex/BlockletDataMap.java
--
diff --git 
a/core/src/main/java/org/apache/carbondata/core/indexstore/blockletindex/BlockletDataMap.java
 
b/core/src/main/java/org/apache/carbondata/core/indexstore/blockletindex/BlockletDataMap.java
new file mode 100644
index 000..3ca9c5a
--- /dev/null
+++ 
b/core/src/main/java/org/apache/carbondata/core/indexstore/blockletindex/BlockletDataMap.java
@@ -0,0 +1,971 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional 

[19/20] carbondata git commit: [CARBONDATA-2216][Test] Fix bugs in sdv tests

2018-03-04 Thread jackylk
[CARBONDATA-2216][Test] Fix bugs in sdv tests

This closes #2012


Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo
Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/62e33e5f
Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/62e33e5f
Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/62e33e5f

Branch: refs/heads/datamap-rebase1
Commit: 62e33e5ff73914ce67dfe2d3b8bff906aa8df15e
Parents: 65cb0d6
Author: xuchuanyin 
Authored: Wed Feb 28 16:02:55 2018 +0800
Committer: Jacky Li 
Committed: Sun Mar 4 22:52:56 2018 +0800

--
 .../cluster/sdv/generated/MergeIndexTestCase.scala   | 15 +--
 1 file changed, 13 insertions(+), 2 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/carbondata/blob/62e33e5f/integration/spark-common-cluster-test/src/test/scala/org/apache/carbondata/cluster/sdv/generated/MergeIndexTestCase.scala
--
diff --git 
a/integration/spark-common-cluster-test/src/test/scala/org/apache/carbondata/cluster/sdv/generated/MergeIndexTestCase.scala
 
b/integration/spark-common-cluster-test/src/test/scala/org/apache/carbondata/cluster/sdv/generated/MergeIndexTestCase.scala
index c0abe4e..b886b11 100644
--- 
a/integration/spark-common-cluster-test/src/test/scala/org/apache/carbondata/cluster/sdv/generated/MergeIndexTestCase.scala
+++ 
b/integration/spark-common-cluster-test/src/test/scala/org/apache/carbondata/cluster/sdv/generated/MergeIndexTestCase.scala
@@ -92,18 +92,29 @@ class MergeIndexTestCase extends QueryTest with 
BeforeAndAfterAll {
 val table = 
CarbonMetadata.getInstance().getCarbonTable("default","carbon_automation_nonmerge")
 new CarbonIndexFileMergeWriter().mergeCarbonIndexFilesOfSegment("0.1", 
table.getTablePath, false)
 assert(getIndexFileCount("default", "carbon_automation_nonmerge", "0.1") 
== 0)
+assert(getMergedIndexFileCount("default", "carbon_automation_nonmerge", 
"0.1") == 1)
 checkAnswer(sql("""Select count(*) from carbon_automation_nonmerge"""), 
rows)
   }
 
   private def getIndexFileCount(dbName: String, tableName: String, segment: 
String): Int = {
+getFileCount(dbName, tableName, segment, CarbonTablePath.INDEX_FILE_EXT)
+  }
+
+  private def getMergedIndexFileCount(dbName: String, tableName: String, 
segment: String): Int = {
+getFileCount(dbName, tableName, segment, 
CarbonTablePath.MERGE_INDEX_FILE_EXT)
+  }
+
+  private def getFileCount(dbName: String,
+  tableName: String,
+  segment: String,
+  suffix: String): Int = {
 val carbonTable = CarbonMetadata.getInstance().getCarbonTable(dbName, 
tableName)
 val identifier = carbonTable.getAbsoluteTableIdentifier
 val path = CarbonTablePath
   .getSegmentPath(identifier.getTablePath, segment)
 val carbonFiles = FileFactory.getCarbonFile(path).listFiles(new 
CarbonFileFilter {
   override def accept(file: CarbonFile): Boolean = {
-file.getName.endsWith(CarbonTablePath
-  .INDEX_FILE_EXT)
+file.getName.endsWith(suffix)
   }
 })
 if (carbonFiles != null) {



[20/20] carbondata git commit: [REBASE] Fix style after rebasing master

2018-03-04 Thread jackylk
[REBASE] Fix style after rebasing master


Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo
Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/f9139930
Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/f9139930
Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/f9139930

Branch: refs/heads/datamap-rebase1
Commit: f9139930ec8ffe60a049e6dac2adf37f262dca59
Parents: 62e33e5
Author: Jacky Li 
Authored: Sun Mar 4 22:57:02 2018 +0800
Committer: Jacky Li 
Committed: Sun Mar 4 22:57:02 2018 +0800

--
 .../carbondata/core/datamap/dev/expr/AndDataMapExprWrapper.java | 2 +-
 .../carbondata/core/datamap/dev/expr/DataMapExprWrapper.java| 2 +-
 .../core/datamap/dev/expr/DataMapExprWrapperImpl.java   | 2 +-
 .../carbondata/core/datamap/dev/expr/OrDataMapExprWrapper.java  | 2 +-
 .../apache/carbondata/hadoop/api/CarbonTableInputFormat.java| 3 ++-
 .../command/preaaggregate/PreAggregateTableHelper.scala | 5 -
 .../scala/org/apache/spark/sql/optimizer/CarbonFilters.scala| 4 +---
 7 files changed, 7 insertions(+), 13 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/carbondata/blob/f9139930/core/src/main/java/org/apache/carbondata/core/datamap/dev/expr/AndDataMapExprWrapper.java
--
diff --git 
a/core/src/main/java/org/apache/carbondata/core/datamap/dev/expr/AndDataMapExprWrapper.java
 
b/core/src/main/java/org/apache/carbondata/core/datamap/dev/expr/AndDataMapExprWrapper.java
index 12b60b4..74469d7 100644
--- 
a/core/src/main/java/org/apache/carbondata/core/datamap/dev/expr/AndDataMapExprWrapper.java
+++ 
b/core/src/main/java/org/apache/carbondata/core/datamap/dev/expr/AndDataMapExprWrapper.java
@@ -20,8 +20,8 @@ import java.io.IOException;
 import java.util.ArrayList;
 import java.util.List;
 
-import org.apache.carbondata.core.datamap.Segment;
 import org.apache.carbondata.core.datamap.DataMapLevel;
+import org.apache.carbondata.core.datamap.Segment;
 import org.apache.carbondata.core.indexstore.ExtendedBlocklet;
 import org.apache.carbondata.core.indexstore.PartitionSpec;
 import org.apache.carbondata.core.scan.filter.resolver.FilterResolverIntf;

http://git-wip-us.apache.org/repos/asf/carbondata/blob/f9139930/core/src/main/java/org/apache/carbondata/core/datamap/dev/expr/DataMapExprWrapper.java
--
diff --git 
a/core/src/main/java/org/apache/carbondata/core/datamap/dev/expr/DataMapExprWrapper.java
 
b/core/src/main/java/org/apache/carbondata/core/datamap/dev/expr/DataMapExprWrapper.java
index ddb19e9..14cfc33 100644
--- 
a/core/src/main/java/org/apache/carbondata/core/datamap/dev/expr/DataMapExprWrapper.java
+++ 
b/core/src/main/java/org/apache/carbondata/core/datamap/dev/expr/DataMapExprWrapper.java
@@ -20,8 +20,8 @@ import java.io.IOException;
 import java.io.Serializable;
 import java.util.List;
 
-import org.apache.carbondata.core.datamap.Segment;
 import org.apache.carbondata.core.datamap.DataMapLevel;
+import org.apache.carbondata.core.datamap.Segment;
 import org.apache.carbondata.core.indexstore.ExtendedBlocklet;
 import org.apache.carbondata.core.indexstore.PartitionSpec;
 import org.apache.carbondata.core.scan.filter.resolver.FilterResolverIntf;

http://git-wip-us.apache.org/repos/asf/carbondata/blob/f9139930/core/src/main/java/org/apache/carbondata/core/datamap/dev/expr/DataMapExprWrapperImpl.java
--
diff --git 
a/core/src/main/java/org/apache/carbondata/core/datamap/dev/expr/DataMapExprWrapperImpl.java
 
b/core/src/main/java/org/apache/carbondata/core/datamap/dev/expr/DataMapExprWrapperImpl.java
index d4be416..c6b011c 100644
--- 
a/core/src/main/java/org/apache/carbondata/core/datamap/dev/expr/DataMapExprWrapperImpl.java
+++ 
b/core/src/main/java/org/apache/carbondata/core/datamap/dev/expr/DataMapExprWrapperImpl.java
@@ -22,8 +22,8 @@ import java.util.List;
 import java.util.UUID;
 
 import org.apache.carbondata.core.datamap.DataMapDistributable;
-import org.apache.carbondata.core.datamap.Segment;
 import org.apache.carbondata.core.datamap.DataMapLevel;
+import org.apache.carbondata.core.datamap.Segment;
 import org.apache.carbondata.core.datamap.TableDataMap;
 import org.apache.carbondata.core.indexstore.ExtendedBlocklet;
 import org.apache.carbondata.core.indexstore.PartitionSpec;

http://git-wip-us.apache.org/repos/asf/carbondata/blob/f9139930/core/src/main/java/org/apache/carbondata/core/datamap/dev/expr/OrDataMapExprWrapper.java
--
diff --git 
a/core/src/main/java/org/apache/carbondata/core/datamap/dev/expr/OrDataMapExprWrapper.java
 

[07/20] carbondata git commit: [CARBONDATA-2189] Add DataMapProvider developer interface

2018-03-04 Thread jackylk
http://git-wip-us.apache.org/repos/asf/carbondata/blob/f8ded96e/core/src/main/java/org/apache/carbondata/core/indexstore/blockletindex/BlockletDataMap.java
--
diff --git 
a/core/src/main/java/org/apache/carbondata/core/indexstore/blockletindex/BlockletDataMap.java
 
b/core/src/main/java/org/apache/carbondata/core/indexstore/blockletindex/BlockletDataMap.java
deleted file mode 100644
index 90178b1..000
--- 
a/core/src/main/java/org/apache/carbondata/core/indexstore/blockletindex/BlockletDataMap.java
+++ /dev/null
@@ -1,971 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.carbondata.core.indexstore.blockletindex;
-
-import java.io.ByteArrayInputStream;
-import java.io.ByteArrayOutputStream;
-import java.io.DataInputStream;
-import java.io.DataOutput;
-import java.io.DataOutputStream;
-import java.io.IOException;
-import java.io.UnsupportedEncodingException;
-import java.math.BigDecimal;
-import java.nio.ByteBuffer;
-import java.util.ArrayList;
-import java.util.BitSet;
-import java.util.Comparator;
-import java.util.List;
-
-import org.apache.carbondata.common.logging.LogService;
-import org.apache.carbondata.common.logging.LogServiceFactory;
-import org.apache.carbondata.core.cache.Cacheable;
-import org.apache.carbondata.core.constants.CarbonCommonConstants;
-import org.apache.carbondata.core.datamap.dev.DataMapModel;
-import 
org.apache.carbondata.core.datamap.dev.cgdatamap.AbstractCoarseGrainDataMap;
-import org.apache.carbondata.core.datastore.IndexKey;
-import org.apache.carbondata.core.datastore.block.SegmentProperties;
-import org.apache.carbondata.core.datastore.block.TableBlockInfo;
-import org.apache.carbondata.core.indexstore.BlockMetaInfo;
-import org.apache.carbondata.core.indexstore.Blocklet;
-import org.apache.carbondata.core.indexstore.BlockletDetailInfo;
-import org.apache.carbondata.core.indexstore.ExtendedBlocklet;
-import org.apache.carbondata.core.indexstore.PartitionSpec;
-import org.apache.carbondata.core.indexstore.UnsafeMemoryDMStore;
-import org.apache.carbondata.core.indexstore.row.DataMapRow;
-import org.apache.carbondata.core.indexstore.row.DataMapRowImpl;
-import org.apache.carbondata.core.indexstore.schema.CarbonRowSchema;
-import org.apache.carbondata.core.memory.MemoryException;
-import org.apache.carbondata.core.metadata.blocklet.BlockletInfo;
-import org.apache.carbondata.core.metadata.blocklet.DataFileFooter;
-import org.apache.carbondata.core.metadata.blocklet.index.BlockletIndex;
-import org.apache.carbondata.core.metadata.blocklet.index.BlockletMinMaxIndex;
-import org.apache.carbondata.core.metadata.datatype.DataType;
-import org.apache.carbondata.core.metadata.datatype.DataTypes;
-import org.apache.carbondata.core.metadata.schema.table.column.CarbonMeasure;
-import org.apache.carbondata.core.metadata.schema.table.column.ColumnSchema;
-import org.apache.carbondata.core.scan.filter.FilterExpressionProcessor;
-import org.apache.carbondata.core.scan.filter.FilterUtil;
-import org.apache.carbondata.core.scan.filter.executer.FilterExecuter;
-import 
org.apache.carbondata.core.scan.filter.executer.ImplicitColumnFilterExecutor;
-import org.apache.carbondata.core.scan.filter.resolver.FilterResolverIntf;
-import org.apache.carbondata.core.util.ByteUtil;
-import org.apache.carbondata.core.util.CarbonUtil;
-import org.apache.carbondata.core.util.DataFileFooterConverter;
-import org.apache.carbondata.core.util.DataTypeUtil;
-import org.apache.carbondata.core.util.path.CarbonTablePath;
-
-import org.apache.commons.lang3.StringUtils;
-import org.apache.hadoop.fs.Path;
-import org.xerial.snappy.Snappy;
-
-/**
- * Datamap implementation for blocklet.
- */
-public class BlockletDataMap extends AbstractCoarseGrainDataMap implements 
Cacheable {
-
-  private static final LogService LOGGER =
-  LogServiceFactory.getLogService(BlockletDataMap.class.getName());
-
-  private static int KEY_INDEX = 0;
-
-  private static int MIN_VALUES_INDEX = 1;
-
-  private static int MAX_VALUES_INDEX = 2;
-
-  private static int ROW_COUNT_INDEX = 3;
-
-  private static int FILE_PATH_INDEX = 4;
-
-  private static int PAGE_COUNT_INDEX = 5;
-
-  

[17/20] carbondata git commit: [CARBONDATA-2206] Fixed lucene datamap evaluation issue in executor

2018-03-04 Thread jackylk
[CARBONDATA-2206] Fixed lucene datamap evaluation issue in executor

In case of MatchExpression it should return same bitset from 
RowLevelFilterExecuterImpl

This closes #2010


Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo
Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/8049185c
Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/8049185c
Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/8049185c

Branch: refs/heads/datamap-rebase1
Commit: 8049185cbdb001588be5c5d97f73bac035d16d6c
Parents: c0133aa
Author: ravipesala 
Authored: Tue Feb 27 19:14:15 2018 +0530
Committer: Jacky Li 
Committed: Sun Mar 4 22:45:17 2018 +0800

--
 .../carbondata/core/datamap/DataMapChooser.java |  3 +-
 .../core/scan/expression/MatchExpression.java   | 57 
 .../executer/RowLevelFilterExecuterImpl.java|  4 ++
 .../lucene/LuceneFineGrainDataMapSuite.scala|  8 ++-
 .../datamap/expression/MatchExpression.java | 56 ---
 .../spark/sql/optimizer/CarbonFilters.scala |  3 +-
 6 files changed, 67 insertions(+), 64 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/carbondata/blob/8049185c/core/src/main/java/org/apache/carbondata/core/datamap/DataMapChooser.java
--
diff --git 
a/core/src/main/java/org/apache/carbondata/core/datamap/DataMapChooser.java 
b/core/src/main/java/org/apache/carbondata/core/datamap/DataMapChooser.java
index c8c971d..f9214a8 100644
--- a/core/src/main/java/org/apache/carbondata/core/datamap/DataMapChooser.java
+++ b/core/src/main/java/org/apache/carbondata/core/datamap/DataMapChooser.java
@@ -228,7 +228,8 @@ public class DataMapChooser {
 
   private boolean contains(DataMapMeta mapMeta, List 
columnExpressions,
   Set expressionTypes) {
-if (mapMeta.getOptimizedOperation().contains(ExpressionType.TEXT_MATCH)) {
+if (mapMeta.getOptimizedOperation().contains(ExpressionType.TEXT_MATCH) &&
+expressionTypes.contains(ExpressionType.TEXT_MATCH)) {
   // TODO: fix it with right logic
   return true;
 }

http://git-wip-us.apache.org/repos/asf/carbondata/blob/8049185c/core/src/main/java/org/apache/carbondata/core/scan/expression/MatchExpression.java
--
diff --git 
a/core/src/main/java/org/apache/carbondata/core/scan/expression/MatchExpression.java
 
b/core/src/main/java/org/apache/carbondata/core/scan/expression/MatchExpression.java
new file mode 100644
index 000..3677b51
--- /dev/null
+++ 
b/core/src/main/java/org/apache/carbondata/core/scan/expression/MatchExpression.java
@@ -0,0 +1,57 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.carbondata.core.scan.expression;
+
+import org.apache.carbondata.common.annotations.InterfaceAudience;
+import org.apache.carbondata.core.metadata.datatype.DataTypes;
+import org.apache.carbondata.core.scan.expression.Expression;
+import org.apache.carbondata.core.scan.expression.ExpressionResult;
+import 
org.apache.carbondata.core.scan.expression.exception.FilterIllegalMemberException;
+import 
org.apache.carbondata.core.scan.expression.exception.FilterUnsupportedException;
+import org.apache.carbondata.core.scan.filter.intf.ExpressionType;
+import org.apache.carbondata.core.scan.filter.intf.RowIntf;
+
+@InterfaceAudience.Internal
+public class MatchExpression extends Expression {
+  private String queryString;
+
+  public MatchExpression(String queryString) {
+this.queryString = queryString;
+  }
+
+  @Override
+  public ExpressionResult evaluate(RowIntf value)
+  throws FilterUnsupportedException, FilterIllegalMemberException {
+return new ExpressionResult(DataTypes.BOOLEAN,true);
+  }
+
+  @Override
+  public ExpressionType getFilterExpressionType() {
+return ExpressionType.TEXT_MATCH;
+  }
+
+  @Override
+  public void findAndSetChild(Expression oldExpr, Expression newExpr) {
+
+  }
+
+  @Override
+  public String getString() {
+

[08/20] carbondata git commit: [CARBONDATA-2189] Add DataMapProvider developer interface

2018-03-04 Thread jackylk
[CARBONDATA-2189] Add DataMapProvider developer interface

Add developer interface for 2 types of DataMap:

1.IndexDataMap: DataMap that leveraging index to accelerate filter query
2.MVDataMap: DataMap that leveraging Materialized View to accelerate olap style 
query, like SPJG query (select, predicate, join, groupby)
This PR adds support for following logic when creating and dropping the DataMap

This closes #1987


Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo
Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/f8ded96e
Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/f8ded96e
Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/f8ded96e

Branch: refs/heads/datamap-rebase1
Commit: f8ded96e659cb1e99cc6ac511d1db7cbc25dddb7
Parents: c7a9f15
Author: Jacky Li 
Authored: Thu Feb 22 20:59:59 2018 +0800
Committer: Jacky Li 
Committed: Sun Mar 4 22:31:26 2018 +0800

--
 .../sql/MalformedDataMapCommandException.java   |   4 +
 .../carbondata/core/datamap/DataMapChooser.java |   5 +-
 .../core/datamap/DataMapRegistry.java   |  37 +
 .../core/datamap/DataMapStoreManager.java   |  82 +-
 .../carbondata/core/datamap/TableDataMap.java   |  42 +-
 .../core/datamap/dev/AbstractDataMapWriter.java |   2 +-
 .../carbondata/core/datamap/dev/DataMap.java|  56 --
 .../core/datamap/dev/DataMapFactory.java|  86 --
 .../core/datamap/dev/IndexDataMap.java  |  56 ++
 .../core/datamap/dev/IndexDataMapFactory.java   |  86 ++
 .../cgdatamap/AbstractCoarseGrainDataMap.java   |  24 -
 .../AbstractCoarseGrainDataMapFactory.java  |  34 -
 .../AbstractCoarseGrainIndexDataMap.java|  24 +
 .../AbstractCoarseGrainIndexDataMapFactory.java |  34 +
 .../dev/expr/DataMapExprWrapperImpl.java|   2 +-
 .../dev/fgdatamap/AbstractFineGrainDataMap.java |  24 -
 .../AbstractFineGrainDataMapFactory.java|  38 -
 .../AbstractFineGrainIndexDataMap.java  |  24 +
 .../AbstractFineGrainIndexDataMapFactory.java   |  38 +
 .../indexstore/BlockletDataMapIndexStore.java   |  33 +-
 .../blockletindex/BlockletDataMap.java  | 971 ---
 .../blockletindex/BlockletDataMapFactory.java   | 285 --
 .../blockletindex/BlockletDataMapModel.java |   2 +-
 .../blockletindex/BlockletIndexDataMap.java | 971 +++
 .../BlockletIndexDataMapFactory.java| 285 ++
 .../core/metadata/schema/table/CarbonTable.java |  10 +-
 .../metadata/schema/table/DataMapSchema.java|   1 +
 .../schema/table/DataMapSchemaFactory.java  |  13 +-
 .../core/metadata/schema/table/TableSchema.java |   3 +-
 .../blockletindex/TestBlockletDataMap.java  |  66 --
 .../blockletindex/TestBlockletIndexDataMap.java |  59 ++
 .../datamap/examples/MinMaxDataMap.java | 152 ---
 .../datamap/examples/MinMaxDataMapFactory.java  | 117 ---
 .../datamap/examples/MinMaxIndexDataMap.java| 150 +++
 .../examples/MinMaxIndexDataMapFactory.java | 117 +++
 .../MinMaxDataMapExample.scala  |   4 +-
 docs/datamap-developer-guide.md |  16 +
 .../hadoop/api/CarbonTableInputFormat.java  |   5 +-
 .../preaggregate/TestPreAggCreateCommand.scala  |   2 +-
 .../TestPreAggregateTableSelection.scala|   4 +-
 .../timeseries/TestTimeSeriesCreateTable.scala  |   4 +-
 .../testsuite/datamap/CGDataMapTestCase.scala   | 381 
 .../datamap/CGIndexDataMapTestCase.scala| 383 
 .../testsuite/datamap/DataMapWriterSuite.scala  | 216 -
 .../testsuite/datamap/FGDataMapTestCase.scala   | 473 -
 .../datamap/FGIndexDataMapTestCase.scala| 472 +
 .../datamap/IndexDataMapWriterSuite.scala   | 217 +
 .../testsuite/datamap/TestDataMapCommand.scala  | 288 --
 .../datamap/TestIndexDataMapCommand.scala   | 285 ++
 .../carbondata/datamap/DataMapManager.java  |  53 +
 .../carbondata/datamap/DataMapProperty.java |  32 +
 .../carbondata/datamap/DataMapProvider.java | 105 ++
 .../datamap/IndexDataMapProvider.java   | 116 +++
 .../datamap/PreAggregateDataMapProvider.java|  92 ++
 .../datamap/TimeseriesDataMapProvider.java  |  50 +
 .../scala/org/apache/spark/sql/CarbonEnv.scala  |   2 +-
 .../datamap/CarbonCreateDataMapCommand.scala|  92 +-
 .../datamap/CarbonDropDataMapCommand.scala  |  73 +-
 .../CarbonAlterTableDropPartitionCommand.scala  |   2 +-
 .../CarbonAlterTableSplitPartitionCommand.scala |   2 +-
 .../CreatePreAggregateTableCommand.scala| 203 
 .../preaaggregate/PreAggregateTableHelper.scala | 195 
 .../preaaggregate/PreAggregateUtil.scala|  24 +-
 .../CarbonAlterTableAddColumnCommand.scala  |   2 +-
 .../CarbonAlterTableDataTypeChangeCommand.scala |   2 +-
 .../CarbonAlterTableDropColumnCommand.scala |   2 +-
 .../schema/CarbonAlterTableRenameCommand.scala  |   2 +-
 

[01/20] carbondata git commit: [CARBONDATA-1543] Supported DataMap chooser and expression for supporting multiple datamaps in single query

2018-03-04 Thread jackylk
Repository: carbondata
Updated Branches:
  refs/heads/datamap-rebase1 [created] f9139930e


http://git-wip-us.apache.org/repos/asf/carbondata/blob/c7a9f15e/integration/spark-common-test/src/test/scala/org/apache/carbondata/integration/spark/testsuite/preaggregate/TestPreAggCreateCommand.scala
--
diff --git 
a/integration/spark-common-test/src/test/scala/org/apache/carbondata/integration/spark/testsuite/preaggregate/TestPreAggCreateCommand.scala
 
b/integration/spark-common-test/src/test/scala/org/apache/carbondata/integration/spark/testsuite/preaggregate/TestPreAggCreateCommand.scala
index ccb9b68..b1962c1 100644
--- 
a/integration/spark-common-test/src/test/scala/org/apache/carbondata/integration/spark/testsuite/preaggregate/TestPreAggCreateCommand.scala
+++ 
b/integration/spark-common-test/src/test/scala/org/apache/carbondata/integration/spark/testsuite/preaggregate/TestPreAggCreateCommand.scala
@@ -19,15 +19,15 @@ package 
org.apache.carbondata.integration.spark.testsuite.preaggregate
 
 import scala.collection.JavaConverters._
 
-import org.apache.spark.sql.{AnalysisException, 
CarbonDatasourceHadoopRelation, Row}
+import org.apache.spark.sql.{CarbonDatasourceHadoopRelation, Row}
 import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
 import org.apache.spark.sql.execution.datasources.LogicalRelation
 import org.apache.spark.sql.hive.CarbonRelation
 import org.apache.spark.sql.test.util.QueryTest
 import org.scalatest.BeforeAndAfterAll
 
-import 
org.apache.carbondata.common.exceptions.sql.MalformedCarbonCommandException
 import org.apache.carbondata.core.constants.CarbonCommonConstants
+import 
org.apache.carbondata.common.exceptions.sql.{MalformedCarbonCommandException, 
MalformedDataMapCommandException}
 import org.apache.carbondata.core.metadata.encoder.Encoding
 import org.apache.carbondata.core.metadata.schema.table.CarbonTable
 import 
org.apache.carbondata.core.metadata.schema.datamap.DataMapProvider.TIMESERIES
@@ -293,7 +293,7 @@ class TestPreAggCreateCommand extends QueryTest with 
BeforeAndAfterAll {
   test("test pre agg create table 22: using invalid datamap provider") {
 sql("DROP DATAMAP IF EXISTS agg0 ON TABLE maintable")
 
-val e: Exception = intercept[MalformedDataMapCommandException] {
+val e = intercept[MalformedDataMapCommandException] {
   sql(
 """
   | CREATE DATAMAP agg0 ON TABLE mainTable

http://git-wip-us.apache.org/repos/asf/carbondata/blob/c7a9f15e/integration/spark-common-test/src/test/scala/org/apache/carbondata/integration/spark/testsuite/timeseries/TestTimeSeriesCreateTable.scala
--
diff --git 
a/integration/spark-common-test/src/test/scala/org/apache/carbondata/integration/spark/testsuite/timeseries/TestTimeSeriesCreateTable.scala
 
b/integration/spark-common-test/src/test/scala/org/apache/carbondata/integration/spark/testsuite/timeseries/TestTimeSeriesCreateTable.scala
index 43316b3..ec76b37 100644
--- 
a/integration/spark-common-test/src/test/scala/org/apache/carbondata/integration/spark/testsuite/timeseries/TestTimeSeriesCreateTable.scala
+++ 
b/integration/spark-common-test/src/test/scala/org/apache/carbondata/integration/spark/testsuite/timeseries/TestTimeSeriesCreateTable.scala
@@ -201,7 +201,7 @@ class TestTimeSeriesCreateTable extends QueryTest with 
BeforeAndAfterAll {
   | GROUP BY dataTime
 """.stripMargin)
 }
-assert(e.getMessage.equals("Unknown datamap provider/class abc"))
+assert(e.getMessage.equals("DataMap class 'abc' not found"))
   }
 
   test("test timeseries create table 12: USING and catch 
MalformedCarbonCommandException") {
@@ -216,7 +216,7 @@ class TestTimeSeriesCreateTable extends QueryTest with 
BeforeAndAfterAll {
   | GROUP BY dataTime
 """.stripMargin)
 }
-assert(e.getMessage.equals("Unknown datamap provider/class abc"))
+assert(e.getMessage.equals("DataMap class 'abc' not found"))
   }
 
   test("test timeseries create table 13: Only one granularity level can be 
defined 1") {
@@ -237,6 +237,7 @@ class TestTimeSeriesCreateTable extends QueryTest with 
BeforeAndAfterAll {
| GROUP BY dataTime
""".stripMargin)
 }
+e.printStackTrace()
 assert(e.getMessage.equals("Only one granularity level can be defined"))
   }
 

http://git-wip-us.apache.org/repos/asf/carbondata/blob/c7a9f15e/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/datacompaction/CompactionSupportGlobalSortBigFileTest.scala
--
diff --git 
a/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/datacompaction/CompactionSupportGlobalSortBigFileTest.scala
 

[05/20] carbondata git commit: [CARBONDATA-2189] Add DataMapProvider developer interface

2018-03-04 Thread jackylk
http://git-wip-us.apache.org/repos/asf/carbondata/blob/f8ded96e/docs/datamap-developer-guide.md
--
diff --git a/docs/datamap-developer-guide.md b/docs/datamap-developer-guide.md
new file mode 100644
index 000..31afd34
--- /dev/null
+++ b/docs/datamap-developer-guide.md
@@ -0,0 +1,16 @@
+# DataMap Developer Guide
+
+### Introduction
+DataMap is a data structure that can be used to accelerate certain query of 
the table. Different DataMap can be implemented by developers. 
+Currently, there are two 2 types of DataMap supported:
+1. IndexDataMap: DataMap that leveraging index to accelerate filter query
+2. MVDataMap: DataMap that leveraging Materialized View to accelerate olap 
style query, like SPJG query (select, predicate, join, groupby)
+
+### DataMap provider
+When user issues `CREATE DATAMAP dm ON TABLE main USING 'provider'`, the 
corresponding DataMapProvider implementation will be created and initialized. 
+Currently, the provider string can be:
+1. preaggregate: one type of MVDataMap that do pre-aggregate of single table
+2. timeseries: one type of MVDataMap that do pre-aggregate based on time 
dimension of the table
+3. class name IndexDataMapFactory  implementation: Developer can implement new 
type of IndexDataMap by extending IndexDataMapFactory
+
+When user issues `DROP DATAMAP dm ON TABLE main`, the corresponding 
DataMapProvider interface will be called.
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/carbondata/blob/f8ded96e/hadoop/src/main/java/org/apache/carbondata/hadoop/api/CarbonTableInputFormat.java
--
diff --git 
a/hadoop/src/main/java/org/apache/carbondata/hadoop/api/CarbonTableInputFormat.java
 
b/hadoop/src/main/java/org/apache/carbondata/hadoop/api/CarbonTableInputFormat.java
index 3bc4547..007ba2f 100644
--- 
a/hadoop/src/main/java/org/apache/carbondata/hadoop/api/CarbonTableInputFormat.java
+++ 
b/hadoop/src/main/java/org/apache/carbondata/hadoop/api/CarbonTableInputFormat.java
@@ -43,8 +43,7 @@ import org.apache.carbondata.core.datastore.impl.FileFactory;
 import org.apache.carbondata.core.exception.InvalidConfigurationException;
 import org.apache.carbondata.core.indexstore.ExtendedBlocklet;
 import org.apache.carbondata.core.indexstore.PartitionSpec;
-import org.apache.carbondata.core.indexstore.blockletindex.BlockletDataMap;
-import 
org.apache.carbondata.core.indexstore.blockletindex.BlockletDataMapFactory;
+import 
org.apache.carbondata.core.indexstore.blockletindex.BlockletIndexDataMapFactory;
 import org.apache.carbondata.core.metadata.AbsoluteTableIdentifier;
 import org.apache.carbondata.core.metadata.ColumnarFormatVersion;
 import org.apache.carbondata.core.metadata.schema.PartitionInfo;
@@ -756,7 +755,7 @@ public class CarbonTableInputFormat extends 
FileInputFormat {
   DistributableDataMapFormat datamapDstr =
   new DistributableDataMapFormat(absoluteTableIdentifier, 
dataMapExprWrapper,
   segmentIds, partitionsToPrune,
-  BlockletDataMapFactory.class.getName());
+  BlockletIndexDataMapFactory.class.getName());
   prunedBlocklets = dataMapJob.execute(datamapDstr, resolver);
   // Apply expression on the blocklets.
   prunedBlocklets = dataMapExprWrapper.pruneBlocklets(prunedBlocklets);

http://git-wip-us.apache.org/repos/asf/carbondata/blob/f8ded96e/integration/spark-common-test/src/test/scala/org/apache/carbondata/integration/spark/testsuite/preaggregate/TestPreAggCreateCommand.scala
--
diff --git 
a/integration/spark-common-test/src/test/scala/org/apache/carbondata/integration/spark/testsuite/preaggregate/TestPreAggCreateCommand.scala
 
b/integration/spark-common-test/src/test/scala/org/apache/carbondata/integration/spark/testsuite/preaggregate/TestPreAggCreateCommand.scala
index b1962c1..f208c92 100644
--- 
a/integration/spark-common-test/src/test/scala/org/apache/carbondata/integration/spark/testsuite/preaggregate/TestPreAggCreateCommand.scala
+++ 
b/integration/spark-common-test/src/test/scala/org/apache/carbondata/integration/spark/testsuite/preaggregate/TestPreAggCreateCommand.scala
@@ -286,7 +286,7 @@ class TestPreAggCreateCommand extends QueryTest with 
BeforeAndAfterAll {
| GROUP BY dob,name
""".stripMargin)
 }
-assert(e.getMessage.contains(s"$timeSeries keyword missing"))
+assert(e.getMessage.contains("Only 'path' dmproperty is allowed for this 
datamap"))
 sql("DROP TABLE IF EXISTS maintabletime")
   }
 

http://git-wip-us.apache.org/repos/asf/carbondata/blob/f8ded96e/integration/spark-common-test/src/test/scala/org/apache/carbondata/integration/spark/testsuite/preaggregate/TestPreAggregateTableSelection.scala
--
diff --git 

[16/20] carbondata git commit: [CARBONDATA-2206] support lucene index datamap

2018-03-04 Thread jackylk
[CARBONDATA-2206] support lucene index datamap

This PR is an initial effort to integrate lucene as an index datamap into 
carbondata.
A new module called carbondata-lucene is added to support lucene datamap:

1.Add LuceneFineGrainDataMap, implement FineGrainDataMap interface.
2.Add LuceneCoarseGrainDataMap, implement CoarseGrainDataMap interface.
3.Support writing lucene index via LuceneDataMapWriter.
4.Implement LuceneDataMapFactory
5.A UDF called TEXT_MATCH is added, use it to do filtering on string column by 
lucene

This closes #2003


Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo
Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/c0133aac
Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/c0133aac
Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/c0133aac

Branch: refs/heads/datamap-rebase1
Commit: c0133aac80db1403754bea93a7654185b98ea9fb
Parents: 96ee82b
Author: Jacky Li 
Authored: Mon Feb 26 16:30:38 2018 +0800
Committer: Jacky Li 
Committed: Sun Mar 4 22:45:15 2018 +0800

--
 .../carbondata/core/datamap/DataMapChooser.java |   4 +
 .../core/datamap/DataMapStoreManager.java   |   5 +-
 .../carbondata/core/datamap/dev/DataMap.java|   2 +-
 .../core/datamap/dev/DataMapFactory.java|   2 +-
 .../core/datamap/dev/DataMapWriter.java |   7 +-
 .../cgdatamap/CoarseGrainDataMapFactory.java|   1 +
 .../core/scan/filter/intf/ExpressionType.java   |   3 +-
 datamap/lucene/pom.xml  | 149 +
 .../lucene/LuceneCoarseGrainDataMap.java| 232 +
 .../lucene/LuceneCoarseGrainDataMapFactory.java |  72 
 .../lucene/LuceneDataMapDistributable.java  |  36 ++
 .../lucene/LuceneDataMapFactoryBase.java| 180 ++
 .../datamap/lucene/LuceneDataMapWriter.java | 328 +++
 .../datamap/lucene/LuceneFineGrainDataMap.java  | 280 
 .../lucene/LuceneFineGrainDataMapFactory.java   |  68 
 .../lucene/LuceneCoarseGrainDataMapSuite.scala  |  73 +
 .../lucene/LuceneFineGrainDataMapSuite.scala|  98 ++
 integration/spark-common-test/pom.xml   |   6 +
 .../testsuite/datamap/FGDataMapTestCase.scala   |   2 +-
 .../carbondata/datamap/DataMapProvider.java |   4 +-
 .../datamap/IndexDataMapProvider.java   |   4 +-
 .../datamap/expression/MatchExpression.java |  56 
 .../carbondata/datamap/TextMatchUDF.scala   |  34 ++
 .../scala/org/apache/spark/sql/CarbonEnv.scala  |   5 +
 .../strategy/CarbonLateDecodeStrategy.scala |   9 +
 .../spark/sql/optimizer/CarbonFilters.scala |   4 +
 pom.xml |   3 +
 .../datamap/DataMapWriterListener.java  |   6 +-
 .../store/writer/AbstractFactDataWriter.java|  12 +-
 .../writer/v3/CarbonFactDataWriterImplV3.java   |   4 +-
 30 files changed, 1671 insertions(+), 18 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/carbondata/blob/c0133aac/core/src/main/java/org/apache/carbondata/core/datamap/DataMapChooser.java
--
diff --git 
a/core/src/main/java/org/apache/carbondata/core/datamap/DataMapChooser.java 
b/core/src/main/java/org/apache/carbondata/core/datamap/DataMapChooser.java
index 94b48c6..c8c971d 100644
--- a/core/src/main/java/org/apache/carbondata/core/datamap/DataMapChooser.java
+++ b/core/src/main/java/org/apache/carbondata/core/datamap/DataMapChooser.java
@@ -228,6 +228,10 @@ public class DataMapChooser {
 
   private boolean contains(DataMapMeta mapMeta, List 
columnExpressions,
   Set expressionTypes) {
+if (mapMeta.getOptimizedOperation().contains(ExpressionType.TEXT_MATCH)) {
+  // TODO: fix it with right logic
+  return true;
+}
 if (mapMeta.getIndexedColumns().size() == 0 || columnExpressions.size() == 
0) {
   return false;
 }

http://git-wip-us.apache.org/repos/asf/carbondata/blob/c0133aac/core/src/main/java/org/apache/carbondata/core/datamap/DataMapStoreManager.java
--
diff --git 
a/core/src/main/java/org/apache/carbondata/core/datamap/DataMapStoreManager.java
 
b/core/src/main/java/org/apache/carbondata/core/datamap/DataMapStoreManager.java
index e57a841..ab339e8 100644
--- 
a/core/src/main/java/org/apache/carbondata/core/datamap/DataMapStoreManager.java
+++ 
b/core/src/main/java/org/apache/carbondata/core/datamap/DataMapStoreManager.java
@@ -16,6 +16,7 @@
  */
 package org.apache.carbondata.core.datamap;
 
+import java.io.IOException;
 import java.util.ArrayList;
 import java.util.HashMap;
 import java.util.List;
@@ -144,7 +145,7 @@ public final class DataMapStoreManager {
* The datamap is created using datamap name, datamap factory class and 
table 

[42/50] [abbrv] carbondata git commit: [CARBONDATA-2159] Remove carbon-spark dependency in store-sdk module

2018-03-04 Thread jackylk
http://git-wip-us.apache.org/repos/asf/carbondata/blob/dcfe73b8/processing/src/main/java/org/apache/carbondata/processing/loading/model/CarbonLoadModelBuilder.java
--
diff --git 
a/processing/src/main/java/org/apache/carbondata/processing/loading/model/CarbonLoadModelBuilder.java
 
b/processing/src/main/java/org/apache/carbondata/processing/loading/model/CarbonLoadModelBuilder.java
new file mode 100644
index 000..fbb93b6
--- /dev/null
+++ 
b/processing/src/main/java/org/apache/carbondata/processing/loading/model/CarbonLoadModelBuilder.java
@@ -0,0 +1,322 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.carbondata.processing.loading.model;
+
+import java.io.IOException;
+import java.text.SimpleDateFormat;
+import java.util.List;
+import java.util.Map;
+
+import org.apache.carbondata.common.Maps;
+import org.apache.carbondata.common.Strings;
+import org.apache.carbondata.common.annotations.InterfaceAudience;
+import org.apache.carbondata.common.constants.LoggerAction;
+import org.apache.carbondata.common.exceptions.sql.InvalidLoadOptionException;
+import org.apache.carbondata.core.constants.CarbonCommonConstants;
+import org.apache.carbondata.core.metadata.schema.table.CarbonTable;
+import org.apache.carbondata.core.metadata.schema.table.column.CarbonColumn;
+import org.apache.carbondata.core.util.CarbonProperties;
+import org.apache.carbondata.core.util.CarbonUtil;
+import 
org.apache.carbondata.processing.loading.constants.DataLoadProcessorConstants;
+import org.apache.carbondata.processing.loading.csvinput.CSVInputFormat;
+import org.apache.carbondata.processing.loading.sort.SortScopeOptions;
+import org.apache.carbondata.processing.util.TableOptionConstant;
+
+import org.apache.commons.lang.StringUtils;
+import org.apache.hadoop.conf.Configuration;
+
+/**
+ * Builder for {@link CarbonLoadModel}
+ */
+@InterfaceAudience.Developer
+public class CarbonLoadModelBuilder {
+
+  private CarbonTable table;
+
+  public CarbonLoadModelBuilder(CarbonTable table) {
+this.table = table;
+  }
+
+  /**
+   * build CarbonLoadModel for data loading
+   * @param options Load options from user input
+   * @return a new CarbonLoadModel instance
+   */
+  public CarbonLoadModel build(
+  Map options) throws InvalidLoadOptionException, 
IOException {
+Map optionsFinal = 
LoadOption.fillOptionWithDefaultValue(options);
+optionsFinal.put("sort_scope", "no_sort");
+if (!options.containsKey("fileheader")) {
+  List csvHeader = 
table.getCreateOrderColumn(table.getTableName());
+  String[] columns = new String[csvHeader.size()];
+  for (int i = 0; i < columns.length; i++) {
+columns[i] = csvHeader.get(i).getColName();
+  }
+  optionsFinal.put("fileheader", Strings.mkString(columns, ","));
+}
+CarbonLoadModel model = new CarbonLoadModel();
+
+// we have provided 'fileheader', so it hadoopConf can be null
+build(options, optionsFinal, model, null);
+
+// set default values
+
model.setTimestampformat(CarbonCommonConstants.CARBON_TIMESTAMP_DEFAULT_FORMAT);
+model.setDateFormat(CarbonCommonConstants.CARBON_DATE_DEFAULT_FORMAT);
+model.setUseOnePass(Boolean.parseBoolean(Maps.getOrDefault(options, 
"onepass", "false")));
+model.setDictionaryServerHost(Maps.getOrDefault(options, "dicthost", 
null));
+try {
+  
model.setDictionaryServerPort(Integer.parseInt(Maps.getOrDefault(options, 
"dictport", "-1")));
+} catch (NumberFormatException e) {
+  throw new InvalidLoadOptionException(e.getMessage());
+}
+return model;
+  }
+
+  /**
+   * build CarbonLoadModel for data loading
+   * @param options Load options from user input
+   * @param optionsFinal Load options that populated with default values for 
optional options
+   * @param carbonLoadModel The output load model
+   * @param hadoopConf hadoopConf is needed to read CSV header if there 
'fileheader' is not set in
+   *   user provided load options
+   */
+  public void build(
+  Map options,
+  Map optionsFinal,
+  CarbonLoadModel 

[47/50] [abbrv] carbondata git commit: [REBASE] Solve conflict after merging master

2018-03-04 Thread jackylk
[REBASE] Solve conflict after merging master


Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo
Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/8104735f
Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/8104735f
Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/8104735f

Branch: refs/heads/carbonstore-rebase5
Commit: 8104735fd66952a531153eb0d3b4db5c9ecc133d
Parents: ce88eb6
Author: Jacky Li 
Authored: Tue Feb 27 11:26:30 2018 +0800
Committer: Jacky Li 
Committed: Sun Mar 4 20:32:14 2018 +0800

--
 .../carbondata/core/datamap/dev/DataMap.java|   6 -
 .../core/datamap/dev/DataMapFactory.java|   2 +-
 .../exception/ConcurrentOperationException.java |  16 +-
 .../core/metadata/PartitionMapFileStore.java|   0
 .../statusmanager/SegmentStatusManager.java |  10 +-
 .../SegmentUpdateStatusManager.java |   1 -
 datamap/examples/pom.xml| 145 +++--
 .../datamap/examples/MinMaxDataWriter.java  |   1 -
 examples/flink/pom.xml  |   4 +-
 .../carbondata/examples/FlinkExample.scala  |  10 +-
 .../CarbonStreamSparkStreamingExample.scala |   1 -
 .../hadoop/api/CarbonTableInputFormat.java  |   5 +-
 .../TestInsertAndOtherCommandConcurrent.scala   |   2 +-
 .../StandardPartitionGlobalSortTestCase.scala   |   2 +-
 .../exception/ProcessMetaDataException.java |   2 +
 .../org/apache/carbondata/api/CarbonStore.scala |   6 +-
 .../carbondata/spark/load/CsvRDDHelper.scala| 157 +++
 .../load/DataLoadProcessBuilderOnSpark.scala|   3 +-
 .../carbondata/spark/util/CarbonScalaUtil.scala |   2 +-
 .../carbondata/spark/util/CommonUtil.scala  |   2 -
 .../command/carbonTableSchemaCommon.scala   |   6 +-
 .../CarbonAlterTableCompactionCommand.scala |   3 +-
 .../management/CarbonCleanFilesCommand.scala|   2 +-
 .../CarbonDeleteLoadByIdCommand.scala   |   2 +-
 .../CarbonDeleteLoadByLoadDateCommand.scala |   2 +-
 .../management/CarbonLoadDataCommand.scala  |  28 ++--
 .../CarbonProjectForDeleteCommand.scala |   2 +-
 .../CarbonProjectForUpdateCommand.scala |   2 +-
 .../schema/CarbonAlterTableRenameCommand.scala  |   2 +-
 .../command/table/CarbonDropTableCommand.scala  |   2 +-
 .../datasources/CarbonFileFormat.scala  |   3 -
 .../vectorreader/AddColumnTestCases.scala   |   1 +
 .../datamap/DataMapWriterListener.java  |   3 +-
 .../loading/model/CarbonLoadModelBuilder.java   |  34 +++-
 .../processing/loading/model/LoadOption.java|  15 +-
 .../processing/merger/CarbonDataMergerUtil.java |   3 +-
 .../util/CarbonDataProcessorUtil.java   |   3 +-
 .../processing/util/CarbonLoaderUtil.java   |   8 +
 store/sdk/pom.xml   |   2 +-
 .../carbondata/sdk/file/CSVCarbonWriter.java|   8 +-
 40 files changed, 336 insertions(+), 172 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/carbondata/blob/8104735f/core/src/main/java/org/apache/carbondata/core/datamap/dev/DataMap.java
--
diff --git 
a/core/src/main/java/org/apache/carbondata/core/datamap/dev/DataMap.java 
b/core/src/main/java/org/apache/carbondata/core/datamap/dev/DataMap.java
index 02db8af..dd5507c 100644
--- a/core/src/main/java/org/apache/carbondata/core/datamap/dev/DataMap.java
+++ b/core/src/main/java/org/apache/carbondata/core/datamap/dev/DataMap.java
@@ -38,9 +38,6 @@ public interface DataMap {
   /**
* Prune the datamap with filter expression and partition information. It 
returns the list of
* blocklets where these filters can exist.
-   *
-   * @param filterExp
-   * @return
*/
   List prune(FilterResolverIntf filterExp, SegmentProperties 
segmentProperties,
   List partitions);
@@ -48,9 +45,6 @@ public interface DataMap {
   // TODO Move this method to Abstract class
   /**
* Validate whether the current segment needs to be fetching the required 
data
-   *
-   * @param filterExp
-   * @return
*/
   boolean isScanRequired(FilterResolverIntf filterExp);
 

http://git-wip-us.apache.org/repos/asf/carbondata/blob/8104735f/core/src/main/java/org/apache/carbondata/core/datamap/dev/DataMapFactory.java
--
diff --git 
a/core/src/main/java/org/apache/carbondata/core/datamap/dev/DataMapFactory.java 
b/core/src/main/java/org/apache/carbondata/core/datamap/dev/DataMapFactory.java
index 50ac279..d8a467f 100644
--- 
a/core/src/main/java/org/apache/carbondata/core/datamap/dev/DataMapFactory.java
+++ 
b/core/src/main/java/org/apache/carbondata/core/datamap/dev/DataMapFactory.java
@@ -21,8 +21,8 @@ import java.util.List;
 
 import 

[15/50] [abbrv] carbondata git commit: [CARBONDATA-2099] Refactor query scan process to improve readability

2018-03-04 Thread jackylk
http://git-wip-us.apache.org/repos/asf/carbondata/blob/92c9f224/core/src/main/java/org/apache/carbondata/core/metadata/blocklet/SegmentInfo.java
--
diff --git 
a/core/src/main/java/org/apache/carbondata/core/metadata/blocklet/SegmentInfo.java
 
b/core/src/main/java/org/apache/carbondata/core/metadata/blocklet/SegmentInfo.java
index 0cb2918..099fffd 100644
--- 
a/core/src/main/java/org/apache/carbondata/core/metadata/blocklet/SegmentInfo.java
+++ 
b/core/src/main/java/org/apache/carbondata/core/metadata/blocklet/SegmentInfo.java
@@ -29,31 +29,12 @@ public class SegmentInfo implements Serializable {
   private static final long serialVersionUID = -174987462709431L;
 
   /**
-   * number of column in the segment
-   */
-  private int numberOfColumns;
-
-  /**
* cardinality of each columns
* column which is not participating in the multidimensional key cardinality 
will be -1;
*/
   private int[] columnCardinality;
 
   /**
-   * @return the numberOfColumns
-   */
-  public int getNumberOfColumns() {
-return numberOfColumns;
-  }
-
-  /**
-   * @param numberOfColumns the numberOfColumns to set
-   */
-  public void setNumberOfColumns(int numberOfColumns) {
-this.numberOfColumns = numberOfColumns;
-  }
-
-  /**
* @return the columnCardinality
*/
   public int[] getColumnCardinality() {

http://git-wip-us.apache.org/repos/asf/carbondata/blob/92c9f224/core/src/main/java/org/apache/carbondata/core/metadata/schema/table/CarbonTable.java
--
diff --git 
a/core/src/main/java/org/apache/carbondata/core/metadata/schema/table/CarbonTable.java
 
b/core/src/main/java/org/apache/carbondata/core/metadata/schema/table/CarbonTable.java
index 6036569..d17d865 100644
--- 
a/core/src/main/java/org/apache/carbondata/core/metadata/schema/table/CarbonTable.java
+++ 
b/core/src/main/java/org/apache/carbondata/core/metadata/schema/table/CarbonTable.java
@@ -19,7 +19,13 @@ package org.apache.carbondata.core.metadata.schema.table;
 
 import java.io.IOException;
 import java.io.Serializable;
-import java.util.*;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.Comparator;
+import java.util.HashMap;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Map;
 
 import org.apache.carbondata.core.constants.CarbonCommonConstants;
 import org.apache.carbondata.core.metadata.AbsoluteTableIdentifier;
@@ -33,7 +39,10 @@ import 
org.apache.carbondata.core.metadata.schema.table.column.CarbonDimension;
 import 
org.apache.carbondata.core.metadata.schema.table.column.CarbonImplicitDimension;
 import org.apache.carbondata.core.metadata.schema.table.column.CarbonMeasure;
 import org.apache.carbondata.core.metadata.schema.table.column.ColumnSchema;
+import org.apache.carbondata.core.scan.model.QueryModel;
+import org.apache.carbondata.core.scan.model.QueryProjection;
 import org.apache.carbondata.core.util.CarbonUtil;
+import org.apache.carbondata.core.util.DataTypeConverter;
 import org.apache.carbondata.core.util.DataTypeUtil;
 import org.apache.carbondata.core.util.path.CarbonTablePath;
 
@@ -136,10 +145,7 @@ public class CarbonTable implements Serializable {
   /**
* During creation of TableInfo from hivemetastore the DataMapSchemas and 
the columns
* DataTypes are not converted to the appropriate child classes.
-   *
* This method will cast the same to the appropriate classes
-   *
-   * @param tableInfo
*/
   public static void updateTableInfo(TableInfo tableInfo) {
 List dataMapSchemas = new ArrayList<>();
@@ -153,8 +159,9 @@ public class CarbonTable implements Serializable {
 }
 tableInfo.setDataMapSchemaList(dataMapSchemas);
 for (ColumnSchema columnSchema : 
tableInfo.getFactTable().getListOfColumns()) {
-  columnSchema.setDataType(DataTypeUtil.valueOf(columnSchema.getDataType(),
-  columnSchema.getPrecision(), columnSchema.getScale()));
+  columnSchema.setDataType(
+  DataTypeUtil.valueOf(
+  columnSchema.getDataType(), columnSchema.getPrecision(), 
columnSchema.getScale()));
 }
 List childSchema = tableInfo.getDataMapSchemaList();
 for (DataMapSchema dataMapSchema : childSchema) {
@@ -168,10 +175,11 @@ public class CarbonTable implements Serializable {
   }
 }
 if (tableInfo.getFactTable().getBucketingInfo() != null) {
-  for (ColumnSchema columnSchema : tableInfo.getFactTable()
-  .getBucketingInfo().getListOfColumns()) {
-
columnSchema.setDataType(DataTypeUtil.valueOf(columnSchema.getDataType(),
-columnSchema.getPrecision(), columnSchema.getScale()));
+  for (ColumnSchema columnSchema :
+  tableInfo.getFactTable().getBucketingInfo().getListOfColumns()) {
+columnSchema.setDataType(
+DataTypeUtil.valueOf(
+columnSchema.getDataType(), columnSchema.getPrecision(), 

carbondata git commit: [maven-release-plugin] prepare for next development iteration

2018-03-04 Thread ravipesala
Repository: carbondata
Updated Branches:
  refs/heads/branch-1.3 744032d3c -> ce9695633


[maven-release-plugin] prepare for next development iteration


Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo
Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/ce969563
Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/ce969563
Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/ce969563

Branch: refs/heads/branch-1.3
Commit: ce9695633a3780f7e72b0413e58db19203d5dc3f
Parents: 744032d
Author: ravipesala 
Authored: Sun Mar 4 18:55:40 2018 +0530
Committer: ravipesala 
Committed: Sun Mar 4 18:55:40 2018 +0530

--
 assembly/pom.xml  | 2 +-
 common/pom.xml| 2 +-
 core/pom.xml  | 2 +-
 examples/spark2/pom.xml   | 2 +-
 format/pom.xml| 2 +-
 hadoop/pom.xml| 2 +-
 integration/hive/pom.xml  | 2 +-
 integration/presto/pom.xml| 2 +-
 integration/spark-common-test/pom.xml | 2 +-
 integration/spark-common/pom.xml  | 2 +-
 integration/spark2/pom.xml| 2 +-
 pom.xml   | 4 ++--
 processing/pom.xml| 2 +-
 streaming/pom.xml | 2 +-
 14 files changed, 15 insertions(+), 15 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/carbondata/blob/ce969563/assembly/pom.xml
--
diff --git a/assembly/pom.xml b/assembly/pom.xml
index 2e07694..30ebd8c 100644
--- a/assembly/pom.xml
+++ b/assembly/pom.xml
@@ -22,7 +22,7 @@
   
 org.apache.carbondata
 carbondata-parent
-1.3.1
+1.3.2-SNAPSHOT
 ../pom.xml
   
 

http://git-wip-us.apache.org/repos/asf/carbondata/blob/ce969563/common/pom.xml
--
diff --git a/common/pom.xml b/common/pom.xml
index dc55ae4..91cfcd1 100644
--- a/common/pom.xml
+++ b/common/pom.xml
@@ -22,7 +22,7 @@
   
 org.apache.carbondata
 carbondata-parent
-1.3.1
+1.3.2-SNAPSHOT
 ../pom.xml
   
 

http://git-wip-us.apache.org/repos/asf/carbondata/blob/ce969563/core/pom.xml
--
diff --git a/core/pom.xml b/core/pom.xml
index 365c388..e86e35d 100644
--- a/core/pom.xml
+++ b/core/pom.xml
@@ -22,7 +22,7 @@
   
 org.apache.carbondata
 carbondata-parent
-1.3.1
+1.3.2-SNAPSHOT
 ../pom.xml
   
 

http://git-wip-us.apache.org/repos/asf/carbondata/blob/ce969563/examples/spark2/pom.xml
--
diff --git a/examples/spark2/pom.xml b/examples/spark2/pom.xml
index 34f3ef4..49f2135 100644
--- a/examples/spark2/pom.xml
+++ b/examples/spark2/pom.xml
@@ -22,7 +22,7 @@
   
 org.apache.carbondata
 carbondata-parent
-1.3.1
+1.3.2-SNAPSHOT
 ../../pom.xml
   
 

http://git-wip-us.apache.org/repos/asf/carbondata/blob/ce969563/format/pom.xml
--
diff --git a/format/pom.xml b/format/pom.xml
index fd71440..0013701 100644
--- a/format/pom.xml
+++ b/format/pom.xml
@@ -22,7 +22,7 @@
   
 org.apache.carbondata
 carbondata-parent
-1.3.1
+1.3.2-SNAPSHOT
 ../pom.xml
   
 

http://git-wip-us.apache.org/repos/asf/carbondata/blob/ce969563/hadoop/pom.xml
--
diff --git a/hadoop/pom.xml b/hadoop/pom.xml
index 59c136e..fcae681 100644
--- a/hadoop/pom.xml
+++ b/hadoop/pom.xml
@@ -22,7 +22,7 @@
   
 org.apache.carbondata
 carbondata-parent
-1.3.1
+1.3.2-SNAPSHOT
 ../pom.xml
   
 

http://git-wip-us.apache.org/repos/asf/carbondata/blob/ce969563/integration/hive/pom.xml
--
diff --git a/integration/hive/pom.xml b/integration/hive/pom.xml
index cc8fe5a..6da68a6 100644
--- a/integration/hive/pom.xml
+++ b/integration/hive/pom.xml
@@ -22,7 +22,7 @@
 
 org.apache.carbondata
 carbondata-parent
-1.3.1
+1.3.2-SNAPSHOT
 ../../pom.xml
 
 

http://git-wip-us.apache.org/repos/asf/carbondata/blob/ce969563/integration/presto/pom.xml
--
diff --git a/integration/presto/pom.xml b/integration/presto/pom.xml
index 6022ed1..00a397f 100644
--- a/integration/presto/pom.xml
+++ b/integration/presto/pom.xml
@@ -22,7 +22,7 @@
   
 org.apache.carbondata
 carbondata-parent
-1.3.1
+1.3.2-SNAPSHOT
 ../../pom.xml
   
 

http://git-wip-us.apache.org/repos/asf/carbondata/blob/ce969563/integration/spark-common-test/pom.xml

[carbondata] Git Push Summary

2018-03-04 Thread ravipesala
Repository: carbondata
Updated Tags:  refs/tags/apache-carbondata-1.3.1-rc1 [created] a1f6cc4c5


carbondata git commit: [maven-release-plugin] prepare release apache-carbondata-1.3.1-rc1

2018-03-04 Thread ravipesala
Repository: carbondata
Updated Branches:
  refs/heads/branch-1.3 362513a68 -> 744032d3c


[maven-release-plugin] prepare release apache-carbondata-1.3.1-rc1


Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo
Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/744032d3
Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/744032d3
Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/744032d3

Branch: refs/heads/branch-1.3
Commit: 744032d3cc39ff009b4a24b2b43f6d9457f439f4
Parents: 362513a
Author: ravipesala 
Authored: Sun Mar 4 18:10:28 2018 +0530
Committer: ravipesala 
Committed: Sun Mar 4 18:10:28 2018 +0530

--
 assembly/pom.xml  | 2 +-
 common/pom.xml| 2 +-
 core/pom.xml  | 2 +-
 examples/spark2/pom.xml   | 2 +-
 format/pom.xml| 2 +-
 hadoop/pom.xml| 2 +-
 integration/hive/pom.xml  | 2 +-
 integration/presto/pom.xml| 2 +-
 integration/spark-common-test/pom.xml | 2 +-
 integration/spark-common/pom.xml  | 2 +-
 integration/spark2/pom.xml| 2 +-
 pom.xml   | 4 ++--
 processing/pom.xml| 2 +-
 streaming/pom.xml | 2 +-
 14 files changed, 15 insertions(+), 15 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/carbondata/blob/744032d3/assembly/pom.xml
--
diff --git a/assembly/pom.xml b/assembly/pom.xml
index c4d6222..2e07694 100644
--- a/assembly/pom.xml
+++ b/assembly/pom.xml
@@ -22,7 +22,7 @@
   
 org.apache.carbondata
 carbondata-parent
-1.3.1-SNAPSHOT
+1.3.1
 ../pom.xml
   
 

http://git-wip-us.apache.org/repos/asf/carbondata/blob/744032d3/common/pom.xml
--
diff --git a/common/pom.xml b/common/pom.xml
index b136141..dc55ae4 100644
--- a/common/pom.xml
+++ b/common/pom.xml
@@ -22,7 +22,7 @@
   
 org.apache.carbondata
 carbondata-parent
-1.3.1-SNAPSHOT
+1.3.1
 ../pom.xml
   
 

http://git-wip-us.apache.org/repos/asf/carbondata/blob/744032d3/core/pom.xml
--
diff --git a/core/pom.xml b/core/pom.xml
index d5a1c0b..365c388 100644
--- a/core/pom.xml
+++ b/core/pom.xml
@@ -22,7 +22,7 @@
   
 org.apache.carbondata
 carbondata-parent
-1.3.1-SNAPSHOT
+1.3.1
 ../pom.xml
   
 

http://git-wip-us.apache.org/repos/asf/carbondata/blob/744032d3/examples/spark2/pom.xml
--
diff --git a/examples/spark2/pom.xml b/examples/spark2/pom.xml
index 3d2260f..34f3ef4 100644
--- a/examples/spark2/pom.xml
+++ b/examples/spark2/pom.xml
@@ -22,7 +22,7 @@
   
 org.apache.carbondata
 carbondata-parent
-1.3.1-SNAPSHOT
+1.3.1
 ../../pom.xml
   
 

http://git-wip-us.apache.org/repos/asf/carbondata/blob/744032d3/format/pom.xml
--
diff --git a/format/pom.xml b/format/pom.xml
index ddc8027..fd71440 100644
--- a/format/pom.xml
+++ b/format/pom.xml
@@ -22,7 +22,7 @@
   
 org.apache.carbondata
 carbondata-parent
-1.3.1-SNAPSHOT
+1.3.1
 ../pom.xml
   
 

http://git-wip-us.apache.org/repos/asf/carbondata/blob/744032d3/hadoop/pom.xml
--
diff --git a/hadoop/pom.xml b/hadoop/pom.xml
index bac5785..59c136e 100644
--- a/hadoop/pom.xml
+++ b/hadoop/pom.xml
@@ -22,7 +22,7 @@
   
 org.apache.carbondata
 carbondata-parent
-1.3.1-SNAPSHOT
+1.3.1
 ../pom.xml
   
 

http://git-wip-us.apache.org/repos/asf/carbondata/blob/744032d3/integration/hive/pom.xml
--
diff --git a/integration/hive/pom.xml b/integration/hive/pom.xml
index 8c38b74..cc8fe5a 100644
--- a/integration/hive/pom.xml
+++ b/integration/hive/pom.xml
@@ -22,7 +22,7 @@
 
 org.apache.carbondata
 carbondata-parent
-1.3.1-SNAPSHOT
+1.3.1
 ../../pom.xml
 
 

http://git-wip-us.apache.org/repos/asf/carbondata/blob/744032d3/integration/presto/pom.xml
--
diff --git a/integration/presto/pom.xml b/integration/presto/pom.xml
index a4c482d..6022ed1 100644
--- a/integration/presto/pom.xml
+++ b/integration/presto/pom.xml
@@ -22,7 +22,7 @@
   
 org.apache.carbondata
 carbondata-parent
-1.3.1-SNAPSHOT
+1.3.1
 ../../pom.xml
   
 

http://git-wip-us.apache.org/repos/asf/carbondata/blob/744032d3/integration/spark-common-test/pom.xml

[39/50] [abbrv] carbondata git commit: Revert "[CARBONDATA-2018][DataLoad] Optimization in reading/writing for sort temp row"

2018-03-04 Thread jackylk
http://git-wip-us.apache.org/repos/asf/carbondata/blob/46031a32/processing/src/main/java/org/apache/carbondata/processing/loading/sort/unsafe/holder/UnsafeSortTempFileChunkHolder.java
--
diff --git 
a/processing/src/main/java/org/apache/carbondata/processing/loading/sort/unsafe/holder/UnsafeSortTempFileChunkHolder.java
 
b/processing/src/main/java/org/apache/carbondata/processing/loading/sort/unsafe/holder/UnsafeSortTempFileChunkHolder.java
index 527452a..11b3d43 100644
--- 
a/processing/src/main/java/org/apache/carbondata/processing/loading/sort/unsafe/holder/UnsafeSortTempFileChunkHolder.java
+++ 
b/processing/src/main/java/org/apache/carbondata/processing/loading/sort/unsafe/holder/UnsafeSortTempFileChunkHolder.java
@@ -31,14 +31,15 @@ import org.apache.carbondata.common.logging.LogService;
 import org.apache.carbondata.common.logging.LogServiceFactory;
 import org.apache.carbondata.core.constants.CarbonCommonConstants;
 import org.apache.carbondata.core.datastore.impl.FileFactory;
+import org.apache.carbondata.core.metadata.datatype.DataType;
+import org.apache.carbondata.core.metadata.datatype.DataTypes;
 import org.apache.carbondata.core.util.CarbonProperties;
 import org.apache.carbondata.core.util.CarbonUtil;
-import org.apache.carbondata.processing.loading.row.IntermediateSortTempRow;
-import org.apache.carbondata.processing.loading.sort.SortStepRowHandler;
+import org.apache.carbondata.core.util.DataTypeUtil;
+import 
org.apache.carbondata.processing.loading.sort.unsafe.UnsafeCarbonRowPage;
 import 
org.apache.carbondata.processing.sort.exception.CarbonSortKeyAndGroupByException;
-import 
org.apache.carbondata.processing.sort.sortdata.IntermediateSortTempRowComparator;
+import org.apache.carbondata.processing.sort.sortdata.NewRowComparator;
 import org.apache.carbondata.processing.sort.sortdata.SortParameters;
-import org.apache.carbondata.processing.sort.sortdata.TableFieldStat;
 
 public class UnsafeSortTempFileChunkHolder implements SortTempChunkHolder {
 
@@ -62,15 +63,21 @@ public class UnsafeSortTempFileChunkHolder implements 
SortTempChunkHolder {
* entry count
*/
   private int entryCount;
+
   /**
* return row
*/
-  private IntermediateSortTempRow returnRow;
+  private Object[] returnRow;
+  private int dimCnt;
+  private int complexCnt;
+  private int measureCnt;
+  private boolean[] isNoDictionaryDimensionColumn;
+  private DataType[] measureDataTypes;
   private int readBufferSize;
   private String compressorName;
-  private IntermediateSortTempRow[] currentBuffer;
+  private Object[][] currentBuffer;
 
-  private IntermediateSortTempRow[] backupBuffer;
+  private Object[][] backupBuffer;
 
   private boolean isBackupFilled;
 
@@ -93,21 +100,27 @@ public class UnsafeSortTempFileChunkHolder implements 
SortTempChunkHolder {
 
   private int numberOfObjectRead;
 
-  private TableFieldStat tableFieldStat;
-  private SortStepRowHandler sortStepRowHandler;
-  private Comparator comparator;
+  private int nullSetWordsLength;
+
+  private Comparator comparator;
+
   /**
* Constructor to initialize
*/
   public UnsafeSortTempFileChunkHolder(File tempFile, SortParameters 
parameters) {
 // set temp file
 this.tempFile = tempFile;
+this.dimCnt = parameters.getDimColCount();
+this.complexCnt = parameters.getComplexDimColCount();
+this.measureCnt = parameters.getMeasureColCount();
+this.isNoDictionaryDimensionColumn = 
parameters.getNoDictionaryDimnesionColumn();
+this.measureDataTypes = parameters.getMeasureDataType();
 this.readBufferSize = parameters.getBufferSize();
 this.compressorName = parameters.getSortTempCompressorName();
-this.tableFieldStat = new TableFieldStat(parameters);
-this.sortStepRowHandler = new SortStepRowHandler(tableFieldStat);
+
 this.executorService = Executors.newFixedThreadPool(1);
-comparator = new 
IntermediateSortTempRowComparator(parameters.getNoDictionarySortColumn());
+this.nullSetWordsLength = ((parameters.getMeasureColCount() - 1) >> 6) + 1;
+comparator = new NewRowComparator(parameters.getNoDictionarySortColumn());
 initialize();
   }
 
@@ -156,17 +169,11 @@ public class UnsafeSortTempFileChunkHolder implements 
SortTempChunkHolder {
*
* @throws CarbonSortKeyAndGroupByException problem while reading
*/
-  @Override
   public void readRow() throws CarbonSortKeyAndGroupByException {
 if (prefetch) {
   fillDataForPrefetch();
 } else {
-  try {
-this.returnRow = 
sortStepRowHandler.readIntermediateSortTempRowFromInputStream(stream);
-this.numberOfObjectRead++;
-  } catch (IOException e) {
-throw new CarbonSortKeyAndGroupByException("Problems while reading 
row", e);
-  }
+  this.returnRow = getRowFromStream();
 }
   }
 
@@ -200,22 +207,63 @@ public class UnsafeSortTempFileChunkHolder implements 
SortTempChunkHolder {
   }
 
   /**

[37/50] [abbrv] carbondata git commit: [CARBONDATA-2018][DataLoad] Optimization in reading/writing for sort temp row

2018-03-04 Thread jackylk
[CARBONDATA-2018][DataLoad] Optimization in reading/writing for sort temp row

Pick up the no-sort fields in the row and pack them as bytes array and skip 
parsing them during merge sort to reduce CPU consumption

This closes #1792


Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo
Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/28b5720f
Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/28b5720f
Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/28b5720f

Branch: refs/heads/carbonstore-rebase5
Commit: 28b5720fcf1cbd0d4bdf3f04e7b0edd8f9492a8d
Parents: dcfe73b
Author: xuchuanyin 
Authored: Thu Feb 8 14:35:14 2018 +0800
Committer: Jacky Li 
Committed: Sun Mar 4 20:32:13 2018 +0800

--
 .../carbondata/core/util/NonDictionaryUtil.java |  67 +--
 .../presto/util/CarbonDataStoreCreator.scala|   1 -
 .../load/DataLoadProcessorStepOnSpark.scala |   6 +-
 .../loading/row/IntermediateSortTempRow.java| 117 +
 .../loading/sort/SortStepRowHandler.java| 466 +++
 .../loading/sort/SortStepRowUtil.java   | 103 
 .../sort/unsafe/UnsafeCarbonRowPage.java| 331 ++---
 .../loading/sort/unsafe/UnsafeSortDataRows.java |  57 +--
 .../unsafe/comparator/UnsafeRowComparator.java  |  95 ++--
 .../UnsafeRowComparatorForNormalDIms.java   |  59 ---
 .../UnsafeRowComparatorForNormalDims.java   |  59 +++
 .../sort/unsafe/holder/SortTempChunkHolder.java |   3 +-
 .../holder/UnsafeFinalMergePageHolder.java  |  19 +-
 .../unsafe/holder/UnsafeInmemoryHolder.java |  21 +-
 .../holder/UnsafeSortTempFileChunkHolder.java   | 138 ++
 .../merger/UnsafeIntermediateFileMerger.java| 118 +
 .../UnsafeSingleThreadFinalSortFilesMerger.java |  27 +-
 .../merger/CompactionResultSortProcessor.java   |   1 -
 .../sort/sortdata/IntermediateFileMerger.java   |  95 +---
 .../IntermediateSortTempRowComparator.java  |  73 +++
 .../sort/sortdata/NewRowComparator.java |   5 +-
 .../sortdata/NewRowComparatorForNormalDims.java |   3 +-
 .../processing/sort/sortdata/RowComparator.java |  94 
 .../sortdata/RowComparatorForNormalDims.java|  62 ---
 .../SingleThreadFinalSortFilesMerger.java   |  25 +-
 .../processing/sort/sortdata/SortDataRows.java  |  85 +---
 .../sort/sortdata/SortTempFileChunkHolder.java  | 174 ++-
 .../sort/sortdata/TableFieldStat.java   | 176 +++
 28 files changed, 1186 insertions(+), 1294 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/carbondata/blob/28b5720f/core/src/main/java/org/apache/carbondata/core/util/NonDictionaryUtil.java
--
diff --git 
a/core/src/main/java/org/apache/carbondata/core/util/NonDictionaryUtil.java 
b/core/src/main/java/org/apache/carbondata/core/util/NonDictionaryUtil.java
index d6ecfbc..fca1244 100644
--- a/core/src/main/java/org/apache/carbondata/core/util/NonDictionaryUtil.java
+++ b/core/src/main/java/org/apache/carbondata/core/util/NonDictionaryUtil.java
@@ -82,18 +82,26 @@ public class NonDictionaryUtil {
   }
 
   /**
-   * Method to get the required Dimension from obj []
+   * Method to get the required dictionary Dimension from obj []
*
* @param index
* @param row
* @return
*/
-  public static Integer getDimension(int index, Object[] row) {
-
-Integer[] dimensions = (Integer[]) 
row[WriteStepRowUtil.DICTIONARY_DIMENSION];
-
+  public static int getDictDimension(int index, Object[] row) {
+int[] dimensions = (int[]) row[WriteStepRowUtil.DICTIONARY_DIMENSION];
 return dimensions[index];
+  }
 
+  /**
+   * Method to get the required non-dictionary & complex from 3-parted row
+   * @param index
+   * @param row
+   * @return
+   */
+  public static byte[] getNoDictOrComplex(int index, Object[] row) {
+byte[][] nonDictArray = (byte[][]) 
row[WriteStepRowUtil.NO_DICTIONARY_AND_COMPLEX];
+return nonDictArray[index];
   }
 
   /**
@@ -108,60 +116,11 @@ public class NonDictionaryUtil {
 return measures[index];
   }
 
-  public static byte[] getByteArrayForNoDictionaryCols(Object[] row) {
-
-return (byte[]) row[WriteStepRowUtil.NO_DICTIONARY_AND_COMPLEX];
-  }
-
   public static void prepareOutObj(Object[] out, int[] dimArray, byte[][] 
byteBufferArr,
   Object[] measureArray) {
-
 out[WriteStepRowUtil.DICTIONARY_DIMENSION] = dimArray;
 out[WriteStepRowUtil.NO_DICTIONARY_AND_COMPLEX] = byteBufferArr;
 out[WriteStepRowUtil.MEASURE] = measureArray;
 
   }
-
-  /**
-   * This method will extract the single dimension from the complete high card 
dims byte[].+ *
-   * The format of the byte [] will be,  Totallength,CompleteStartOffsets,Dat
-   *
-   * @param highCardArr
-   * @param index
-   * @param highCardinalityCount
-   

[23/50] [abbrv] carbondata git commit: [CARBONDATA-2025] Unify all path construction through CarbonTablePath static method

2018-03-04 Thread jackylk
[CARBONDATA-2025] Unify all path construction through CarbonTablePath static 
method

Refactory CarbonTablePath:

1.Remove CarbonStorePath and use CarbonTablePath only.
2.Make CarbonTablePath an utility without object creation, it can avoid 
creating object before using it, thus code is cleaner and GC is less.

This closes #1768


Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo
Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/f06824e9
Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/f06824e9
Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/f06824e9

Branch: refs/heads/carbonstore-rebase5
Commit: f06824e9744a831776b1203c94d4001eef870b14
Parents: 92c9f22
Author: Jacky Li 
Authored: Wed Jan 31 16:14:27 2018 +0800
Committer: Jacky Li 
Committed: Sun Mar 4 20:32:00 2018 +0800

--
 .../DictionaryColumnUniqueIdentifier.java   |  29 +-
 .../dictionary/ManageDictionaryAndBTree.java|  13 +-
 .../core/metadata/AbsoluteTableIdentifier.java  |   4 +-
 .../core/metadata/SegmentFileStore.java |   3 +-
 .../core/metadata/schema/table/CarbonTable.java |  11 +-
 .../core/mutate/CarbonUpdateUtil.java   |  58 ++--
 .../core/scan/executor/util/QueryUtil.java  |   7 +-
 .../scan/executor/util/RestructureUtil.java |   6 +-
 .../scan/filter/FilterExpressionProcessor.java  |   2 +-
 .../filter/executer/FalseFilterExecutor.java|   8 +-
 .../RowLevelRangeGrtThanFiterExecuterImpl.java  |   1 +
 ...elRangeGrtrThanEquaToFilterExecuterImpl.java |   1 +
 ...velRangeLessThanEqualFilterExecuterImpl.java |   1 +
 .../RowLevelRangeLessThanFiterExecuterImpl.java |   1 +
 .../FalseConditionalResolverImpl.java   |   4 +-
 .../core/service/CarbonCommonFactory.java   |  16 -
 .../carbondata/core/service/PathService.java|  35 ---
 .../core/service/impl/PathFactory.java  |  50 
 .../statusmanager/SegmentStatusManager.java |  31 +-
 .../SegmentUpdateStatusManager.java |  70 ++---
 .../apache/carbondata/core/util/CarbonUtil.java |  98 ++
 .../util/path/CarbonSharedDictionaryPath.java   |  71 -
 .../core/util/path/CarbonStorePath.java |  71 -
 .../core/util/path/CarbonTablePath.java | 298 ++-
 .../dictionary/AbstractDictionaryCacheTest.java |  11 +-
 .../dictionary/ForwardDictionaryCacheTest.java  |   6 +-
 .../dictionary/ReverseDictionaryCacheTest.java  |   6 +-
 .../reader/CarbonDictionaryReaderImplTest.java  |   8 -
 .../CarbonFormatDirectoryStructureTest.java |  18 +-
 .../path/CarbonFormatSharedDictionaryTest.java  |  44 ---
 .../writer/CarbonDictionaryWriterImplTest.java  |  19 +-
 .../CarbonBatchSparkStreamingExample.scala  |   9 +-
 .../CarbonStreamSparkStreamingExample.scala |  10 +-
 .../CarbonStructuredStreamingExample.scala  |  11 +-
 ...CarbonStructuredStreamingWithRowParser.scala |   9 +-
 .../hadoop/api/CarbonTableInputFormat.java  |   8 +-
 .../streaming/CarbonStreamRecordWriter.java |   6 +-
 .../carbondata/hadoop/util/SchemaReader.java|  18 +-
 .../hadoop/test/util/StoreCreator.java  |  16 +-
 .../presto/CarbondataRecordSetProvider.java |   7 +-
 .../presto/impl/CarbonTableCacheModel.java  |  13 +-
 .../presto/impl/CarbonTableReader.java  |  49 ++-
 .../presto/util/CarbonDataStoreCreator.scala|  10 +-
 .../sdv/generated/MergeIndexTestCase.scala  |  11 +-
 .../dataload/TestLoadDataGeneral.scala  |  11 +-
 .../InsertIntoCarbonTableTestCase.scala |   8 +-
 .../createTable/TestCreateTableAsSelect.scala   |   2 +-
 .../datacompaction/DataCompactionLockTest.scala |   6 +-
 .../MajorCompactionIgnoreInMinorTest.scala  |  12 +-
 .../dataload/TestBatchSortDataLoad.scala|   5 +-
 .../dataload/TestDataLoadWithFileName.scala |   5 +-
 .../dataload/TestGlobalSortDataLoad.scala   |   8 +-
 .../dataretention/DataRetentionTestCase.scala   |   6 +-
 .../TestDataLoadingForPartitionTable.scala  |   4 +-
 .../StandardPartitionTableLoadingTestCase.scala |   6 +-
 .../org/apache/carbondata/api/CarbonStore.scala |   4 +-
 .../carbondata/spark/CarbonSparkFactory.scala   |   2 +-
 .../spark/DictionaryDetailHelper.scala  |   9 +-
 .../spark/rdd/AlterTableAddColumnRDD.scala  |  17 +-
 .../spark/rdd/CarbonGlobalDictionaryRDD.scala   |   6 -
 .../carbondata/spark/util/CommonUtil.scala  |  30 +-
 .../carbondata/spark/util/DataLoadingUtil.scala |  10 +-
 .../spark/util/GlobalDictionaryUtil.scala   |  16 +-
 .../command/carbonTableSchemaCommon.scala   |   9 +-
 .../org/apache/spark/util/PartitionUtils.scala  |   6 +-
 .../spark/rdd/AggregateDataMapCompactor.scala   |  19 +-
 .../spark/rdd/CarbonDataRDDFactory.scala|  14 +-
 .../spark/rdd/CarbonTableCompactor.scala|   8 +-
 .../CarbonAlterTableCompactionCommand.scala |  13 +-
 

[50/50] [abbrv] carbondata git commit: Support generating assembling JAR for store-sdk module

2018-03-04 Thread jackylk
Support generating assembling JAR for store-sdk module

Support generating assembling JAR for store-sdk module and remove junit 
dependency

This closes #1976


Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo
Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/503e0d96
Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/503e0d96
Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/503e0d96

Branch: refs/heads/carbonstore-rebase5
Commit: 503e0d96864173ccfb29e49686f0af3f7edd779f
Parents: 8fe8ab4
Author: Jacky Li 
Authored: Tue Feb 13 09:12:09 2018 +0800
Committer: Jacky Li 
Committed: Sun Mar 4 20:32:14 2018 +0800

--
 common/pom.xml|  2 +
 core/pom.xml  |  2 +
 hadoop/pom.xml|  1 +
 integration/presto/pom.xml|  3 +-
 integration/spark-common-cluster-test/pom.xml |  2 +-
 integration/spark-common-test/pom.xml |  3 +-
 integration/spark-common/pom.xml  |  2 +-
 integration/spark2/pom.xml|  2 +-
 pom.xml   |  5 +++
 processing/pom.xml|  1 +
 store/sdk/pom.xml | 50 +-
 streaming/pom.xml |  1 -
 12 files changed, 66 insertions(+), 8 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/carbondata/blob/503e0d96/common/pom.xml
--
diff --git a/common/pom.xml b/common/pom.xml
index 5550129..433d575 100644
--- a/common/pom.xml
+++ b/common/pom.xml
@@ -42,10 +42,12 @@
 
   junit
   junit
+  test
 
 
   org.jmockit
   jmockit
+  test
 
 
   org.apache.hadoop

http://git-wip-us.apache.org/repos/asf/carbondata/blob/503e0d96/core/pom.xml
--
diff --git a/core/pom.xml b/core/pom.xml
index 92c9607..824de0d 100644
--- a/core/pom.xml
+++ b/core/pom.xml
@@ -70,10 +70,12 @@
 
   org.jmockit
   jmockit
+  test
 
 
   junit
   junit
+  test
 
 
   org.apache.spark

http://git-wip-us.apache.org/repos/asf/carbondata/blob/503e0d96/hadoop/pom.xml
--
diff --git a/hadoop/pom.xml b/hadoop/pom.xml
index 2aaac99..c3964c5 100644
--- a/hadoop/pom.xml
+++ b/hadoop/pom.xml
@@ -42,6 +42,7 @@
 
   junit
   junit
+  test
 
   
 

http://git-wip-us.apache.org/repos/asf/carbondata/blob/503e0d96/integration/presto/pom.xml
--
diff --git a/integration/presto/pom.xml b/integration/presto/pom.xml
index aaaf175..0abcf38 100644
--- a/integration/presto/pom.xml
+++ b/integration/presto/pom.xml
@@ -193,7 +193,7 @@
 
 
   org.scalatest
-  scalatest_2.11
+  scalatest_${scala.binary.version}
 
 
   org.apache.zookeeper
@@ -330,7 +330,6 @@
 
   org.scalatest
   scalatest_${scala.binary.version}
-  2.2.1
   test
 
 

http://git-wip-us.apache.org/repos/asf/carbondata/blob/503e0d96/integration/spark-common-cluster-test/pom.xml
--
diff --git a/integration/spark-common-cluster-test/pom.xml 
b/integration/spark-common-cluster-test/pom.xml
index fd907a3..028da11 100644
--- a/integration/spark-common-cluster-test/pom.xml
+++ b/integration/spark-common-cluster-test/pom.xml
@@ -49,11 +49,11 @@
 
   junit
   junit
+  test
 
 
   org.scalatest
   scalatest_${scala.binary.version}
-  2.2.1
   test
 
 

http://git-wip-us.apache.org/repos/asf/carbondata/blob/503e0d96/integration/spark-common-test/pom.xml
--
diff --git a/integration/spark-common-test/pom.xml 
b/integration/spark-common-test/pom.xml
index 67a2317..d1c04ae 100644
--- a/integration/spark-common-test/pom.xml
+++ b/integration/spark-common-test/pom.xml
@@ -106,16 +106,17 @@
 
   junit
   junit
+  test
 
 
   org.scalatest
   scalatest_${scala.binary.version}
-  2.2.1
   test
 
 
   org.jmockit
   jmockit
+  test
 
   
 

http://git-wip-us.apache.org/repos/asf/carbondata/blob/503e0d96/integration/spark-common/pom.xml
--
diff --git a/integration/spark-common/pom.xml b/integration/spark-common/pom.xml
index 295d62b..16f327d 100644
--- a/integration/spark-common/pom.xml
+++ b/integration/spark-common/pom.xml
@@ -58,11 +58,11 @@
   

[41/50] [abbrv] carbondata git commit: [CARBONDATA-1997] Add CarbonWriter SDK API

2018-03-04 Thread jackylk
[CARBONDATA-1997] Add CarbonWriter SDK API

Added a new module called store-sdk, and added a CarbonWriter API, it can be 
used to write Carbondata files to a specified folder, without Spark and Hadoop 
dependency. User can use this API in any environment.

This closes #1967


Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo
Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/5fccdabf
Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/5fccdabf
Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/5fccdabf

Branch: refs/heads/carbonstore-rebase5
Commit: 5fccdabfc1cc4656d75e51867dcfcb250c505c91
Parents: fc31be7
Author: Jacky Li 
Authored: Sat Feb 10 19:44:23 2018 +0800
Committer: Jacky Li 
Committed: Sun Mar 4 20:32:13 2018 +0800

--
 .../org/apache/carbondata/common/Strings.java   |  40 
 .../apache/carbondata/common/StringsSuite.java  |  53 +
 .../core/metadata/schema/table/CarbonTable.java |   7 +
 .../schema/table/CarbonTableBuilder.java|  72 +++
 .../core/metadata/schema/table/TableSchema.java |   7 +
 .../schema/table/TableSchemaBuilder.java| 107 ++
 .../schema/table/CarbonTableBuilderSuite.java   |  86 
 .../metadata/schema/table/CarbonTableTest.java  |  12 +-
 .../schema/table/TableSchemaBuilderSuite.java   |  56 ++
 .../carbondata/spark/util/DataLoadingUtil.scala |  45 +
 pom.xml |   7 +
 store/sdk/pom.xml   | 130 +
 .../carbondata/sdk/file/CSVCarbonWriter.java|  89 +
 .../carbondata/sdk/file/CarbonWriter.java   |  51 +
 .../sdk/file/CarbonWriterBuilder.java   | 194 +++
 .../org/apache/carbondata/sdk/file/Field.java   |  74 +++
 .../org/apache/carbondata/sdk/file/Schema.java  |  74 +++
 .../sdk/file/CSVCarbonWriterSuite.java  | 127 
 18 files changed, 1225 insertions(+), 6 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/carbondata/blob/5fccdabf/common/src/main/java/org/apache/carbondata/common/Strings.java
--
diff --git a/common/src/main/java/org/apache/carbondata/common/Strings.java 
b/common/src/main/java/org/apache/carbondata/common/Strings.java
new file mode 100644
index 000..23288dd
--- /dev/null
+++ b/common/src/main/java/org/apache/carbondata/common/Strings.java
@@ -0,0 +1,40 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.carbondata.common;
+
+import java.util.Objects;
+
+public class Strings {
+
+  /**
+   * Provide same function as mkString in Scala.
+   * This is added to avoid JDK 8 dependency.
+   */
+  public static String mkString(String[] strings, String delimeter) {
+Objects.requireNonNull(strings);
+Objects.requireNonNull(delimeter);
+StringBuilder builder = new StringBuilder();
+for (int i = 0; i < strings.length; i++) {
+  builder.append(strings[i]);
+  if (i != strings.length - 1) {
+builder.append(delimeter);
+  }
+}
+return builder.toString();
+  }
+}

http://git-wip-us.apache.org/repos/asf/carbondata/blob/5fccdabf/common/src/test/java/org/apache/carbondata/common/StringsSuite.java
--
diff --git 
a/common/src/test/java/org/apache/carbondata/common/StringsSuite.java 
b/common/src/test/java/org/apache/carbondata/common/StringsSuite.java
new file mode 100644
index 000..65da32b
--- /dev/null
+++ b/common/src/test/java/org/apache/carbondata/common/StringsSuite.java
@@ -0,0 +1,53 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the 

[44/50] [abbrv] carbondata git commit: [CARBONDATA-2159] Remove carbon-spark dependency in store-sdk module

2018-03-04 Thread jackylk
[CARBONDATA-2159] Remove carbon-spark dependency in store-sdk module

To make assembling JAR of store-sdk module, it should not depend on 
carbon-spark module

This closes #1970


Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo
Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/dcfe73b8
Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/dcfe73b8
Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/dcfe73b8

Branch: refs/heads/carbonstore-rebase5
Commit: dcfe73b8b07267369f8c58130f27b75efccb4ee1
Parents: 5fccdab
Author: Jacky Li 
Authored: Sun Feb 11 21:37:04 2018 +0800
Committer: Jacky Li 
Committed: Sun Mar 4 20:32:13 2018 +0800

--
 .../java/org/apache/carbondata/common/Maps.java |  39 ++
 .../org/apache/carbondata/common/Strings.java   |   3 +
 .../exceptions/TableStatusLockException.java|  34 ++
 .../sql/InvalidLoadOptionException.java |  33 +
 .../sql/MalformedCarbonCommandException.java|  75 +++
 .../sql/MalformedDataMapCommandException.java   |  37 ++
 .../exceptions/sql/NoSuchDataMapException.java  |  39 ++
 .../carbondata/core/datamap/TableDataMap.java   |   5 +-
 .../exception/ConcurrentOperationException.java |  50 ++
 .../statusmanager/SegmentStatusManager.java | 124 
 .../carbondata/core/util/DeleteLoadFolders.java | 210 +++
 .../preaggregate/TestPreAggCreateCommand.scala  |   2 +-
 .../preaggregate/TestPreAggregateDrop.scala |   2 +-
 .../timeseries/TestTimeSeriesCreateTable.scala  |   2 +-
 .../timeseries/TestTimeSeriesDropSuite.scala|   2 +-
 .../TestTimeseriesTableSelection.scala  |   2 +-
 .../TestDataLoadWithColumnsMoreThanSchema.scala |   3 +-
 .../dataload/TestGlobalSortDataLoad.scala   |   2 +-
 .../TestLoadDataWithDiffTimestampFormat.scala   |   2 +-
 .../TestLoadDataWithFileHeaderException.scala   |  11 +-
 ...ataWithMalformedCarbonCommandException.scala |   3 +-
 .../testsuite/dataload/TestLoadOptions.scala|   2 +-
 .../dataload/TestTableLevelBlockSize.scala  |   4 +-
 .../testsuite/datamap/TestDataMapCommand.scala  |   2 +-
 .../dataretention/DataRetentionTestCase.scala   |   2 +-
 .../spark/testsuite/datetype/DateTypeTest.scala |   2 +-
 .../testsuite/sortcolumns/TestSortColumns.scala |   3 +-
 integration/spark-common/pom.xml|   5 -
 .../exception/ConcurrentOperationException.java |  38 --
 .../MalformedCarbonCommandException.java|  69 ---
 .../MalformedDataMapCommandException.java   |  32 -
 .../spark/exception/NoSuchDataMapException.java |  33 -
 .../org/apache/carbondata/api/CarbonStore.scala |   3 +-
 .../spark/CarbonColumnValidator.scala   |   8 +-
 .../carbondata/spark/load/ValidateUtil.scala|  72 ---
 .../carbondata/spark/rdd/CarbonMergerRDD.scala  |   6 +-
 .../carbondata/spark/util/CommonUtil.scala  |  70 +--
 .../carbondata/spark/util/DataLoadingUtil.scala | 610 ---
 .../spark/util/GlobalDictionaryUtil.scala   |   2 +-
 .../spark/sql/catalyst/CarbonDDLSqlParser.scala |   2 +-
 .../spark/rdd/CarbonDataRDDFactory.scala|   4 +-
 .../spark/rdd/CarbonTableCompactor.scala|   2 +-
 .../org/apache/spark/sql/CarbonSource.scala |   2 +-
 .../datamap/CarbonCreateDataMapCommand.scala|   2 +-
 .../datamap/CarbonDropDataMapCommand.scala  |   2 +-
 .../CarbonAlterTableCompactionCommand.scala |  13 +-
 .../management/CarbonLoadDataCommand.scala  |  17 +-
 .../CarbonProjectForDeleteCommand.scala |   2 +-
 .../CarbonProjectForUpdateCommand.scala |   2 +-
 .../command/mutation/IUDCommonUtil.scala|   2 +-
 .../CreatePreAggregateTableCommand.scala|   7 +-
 .../preaaggregate/PreAggregateUtil.scala|   2 +-
 .../schema/CarbonAlterTableRenameCommand.scala  |   3 +-
 .../command/timeseries/TimeSeriesUtil.scala |   2 +-
 .../datasources/CarbonFileFormat.scala  |  14 +-
 .../sql/execution/strategy/DDLStrategy.scala|   2 +-
 .../strategy/StreamingTableStrategy.scala   |   2 +-
 .../execution/command/CarbonHiveCommands.scala  |   2 +-
 .../sql/parser/CarbonSpark2SqlParser.scala  |   2 +-
 .../spark/sql/parser/CarbonSparkSqlParser.scala |   2 +-
 .../org/apache/spark/util/AlterTableUtil.scala  |   2 +-
 .../org/apache/spark/util/TableAPIUtil.scala|   2 +-
 .../spark/sql/hive/CarbonSessionState.scala |   7 +-
 .../segmentreading/TestSegmentReading.scala |   2 +-
 .../spark/util/AllDictionaryTestCase.scala  |   4 +-
 .../util/ExternalColumnDictionaryTestCase.scala |   6 +-
 .../TestStreamingTableOperation.scala   |   4 +-
 .../bucketing/TableBucketingTestCase.scala  |   2 +-
 .../vectorreader/AddColumnTestCases.scala   |   2 +-
 .../loading/model/CarbonLoadModel.java  |  14 +-
 .../loading/model/CarbonLoadModelBuilder.java   | 322 ++
 .../processing/loading/model/LoadOption.java| 

[40/50] [abbrv] carbondata git commit: Revert "[CARBONDATA-2018][DataLoad] Optimization in reading/writing for sort temp row"

2018-03-04 Thread jackylk
Revert "[CARBONDATA-2018][DataLoad] Optimization in reading/writing for sort 
temp row"

This reverts commit de92ea9a123b17d903f2d1d4662299315c792954.


Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo
Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/46031a32
Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/46031a32
Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/46031a32

Branch: refs/heads/carbonstore-rebase5
Commit: 46031a320506ceed10b2134710be6c630c6ee533
Parents: 1d85e91
Author: Jacky Li 
Authored: Sat Feb 10 20:11:25 2018 +0800
Committer: Jacky Li 
Committed: Sun Mar 4 20:32:13 2018 +0800

--
 .../carbondata/core/util/NonDictionaryUtil.java |  67 ++-
 .../presto/util/CarbonDataStoreCreator.scala|   1 +
 .../load/DataLoadProcessorStepOnSpark.scala |   6 +-
 .../loading/row/IntermediateSortTempRow.java| 117 -
 .../loading/sort/SortStepRowHandler.java| 466 ---
 .../loading/sort/SortStepRowUtil.java   | 103 
 .../sort/unsafe/UnsafeCarbonRowPage.java| 331 +++--
 .../loading/sort/unsafe/UnsafeSortDataRows.java |  57 ++-
 .../unsafe/comparator/UnsafeRowComparator.java  |  95 ++--
 .../UnsafeRowComparatorForNormalDIms.java   |  59 +++
 .../UnsafeRowComparatorForNormalDims.java   |  59 ---
 .../sort/unsafe/holder/SortTempChunkHolder.java |   3 +-
 .../holder/UnsafeFinalMergePageHolder.java  |  19 +-
 .../unsafe/holder/UnsafeInmemoryHolder.java |  21 +-
 .../holder/UnsafeSortTempFileChunkHolder.java   | 138 --
 .../merger/UnsafeIntermediateFileMerger.java| 118 -
 .../UnsafeSingleThreadFinalSortFilesMerger.java |  27 +-
 .../merger/CompactionResultSortProcessor.java   |   1 +
 .../sort/sortdata/IntermediateFileMerger.java   |  95 +++-
 .../IntermediateSortTempRowComparator.java  |  73 ---
 .../sort/sortdata/NewRowComparator.java |   5 +-
 .../sortdata/NewRowComparatorForNormalDims.java |   3 +-
 .../processing/sort/sortdata/RowComparator.java |  94 
 .../sortdata/RowComparatorForNormalDims.java|  62 +++
 .../SingleThreadFinalSortFilesMerger.java   |  25 +-
 .../processing/sort/sortdata/SortDataRows.java  |  85 +++-
 .../sort/sortdata/SortTempFileChunkHolder.java  | 174 +--
 .../sort/sortdata/TableFieldStat.java   | 176 ---
 28 files changed, 1294 insertions(+), 1186 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/carbondata/blob/46031a32/core/src/main/java/org/apache/carbondata/core/util/NonDictionaryUtil.java
--
diff --git 
a/core/src/main/java/org/apache/carbondata/core/util/NonDictionaryUtil.java 
b/core/src/main/java/org/apache/carbondata/core/util/NonDictionaryUtil.java
index fca1244..d6ecfbc 100644
--- a/core/src/main/java/org/apache/carbondata/core/util/NonDictionaryUtil.java
+++ b/core/src/main/java/org/apache/carbondata/core/util/NonDictionaryUtil.java
@@ -82,26 +82,18 @@ public class NonDictionaryUtil {
   }
 
   /**
-   * Method to get the required dictionary Dimension from obj []
+   * Method to get the required Dimension from obj []
*
* @param index
* @param row
* @return
*/
-  public static int getDictDimension(int index, Object[] row) {
-int[] dimensions = (int[]) row[WriteStepRowUtil.DICTIONARY_DIMENSION];
+  public static Integer getDimension(int index, Object[] row) {
+
+Integer[] dimensions = (Integer[]) 
row[WriteStepRowUtil.DICTIONARY_DIMENSION];
+
 return dimensions[index];
-  }
 
-  /**
-   * Method to get the required non-dictionary & complex from 3-parted row
-   * @param index
-   * @param row
-   * @return
-   */
-  public static byte[] getNoDictOrComplex(int index, Object[] row) {
-byte[][] nonDictArray = (byte[][]) 
row[WriteStepRowUtil.NO_DICTIONARY_AND_COMPLEX];
-return nonDictArray[index];
   }
 
   /**
@@ -116,11 +108,60 @@ public class NonDictionaryUtil {
 return measures[index];
   }
 
+  public static byte[] getByteArrayForNoDictionaryCols(Object[] row) {
+
+return (byte[]) row[WriteStepRowUtil.NO_DICTIONARY_AND_COMPLEX];
+  }
+
   public static void prepareOutObj(Object[] out, int[] dimArray, byte[][] 
byteBufferArr,
   Object[] measureArray) {
+
 out[WriteStepRowUtil.DICTIONARY_DIMENSION] = dimArray;
 out[WriteStepRowUtil.NO_DICTIONARY_AND_COMPLEX] = byteBufferArr;
 out[WriteStepRowUtil.MEASURE] = measureArray;
 
   }
+
+  /**
+   * This method will extract the single dimension from the complete high card 
dims byte[].+ *
+   * The format of the byte [] will be,  Totallength,CompleteStartOffsets,Dat
+   *
+   * @param highCardArr
+   * @param index
+   * @param highCardinalityCount
+   * @param outBuffer
+   */
+  public static void extractSingleHighCardDims(byte[] 

[27/50] [abbrv] carbondata git commit: [CARBONDATA-2018][DataLoad] Optimization in reading/writing for sort temp row

2018-03-04 Thread jackylk
http://git-wip-us.apache.org/repos/asf/carbondata/blob/586ab702/processing/src/main/java/org/apache/carbondata/processing/loading/sort/unsafe/holder/UnsafeSortTempFileChunkHolder.java
--
diff --git 
a/processing/src/main/java/org/apache/carbondata/processing/loading/sort/unsafe/holder/UnsafeSortTempFileChunkHolder.java
 
b/processing/src/main/java/org/apache/carbondata/processing/loading/sort/unsafe/holder/UnsafeSortTempFileChunkHolder.java
index 11b3d43..527452a 100644
--- 
a/processing/src/main/java/org/apache/carbondata/processing/loading/sort/unsafe/holder/UnsafeSortTempFileChunkHolder.java
+++ 
b/processing/src/main/java/org/apache/carbondata/processing/loading/sort/unsafe/holder/UnsafeSortTempFileChunkHolder.java
@@ -31,15 +31,14 @@ import org.apache.carbondata.common.logging.LogService;
 import org.apache.carbondata.common.logging.LogServiceFactory;
 import org.apache.carbondata.core.constants.CarbonCommonConstants;
 import org.apache.carbondata.core.datastore.impl.FileFactory;
-import org.apache.carbondata.core.metadata.datatype.DataType;
-import org.apache.carbondata.core.metadata.datatype.DataTypes;
 import org.apache.carbondata.core.util.CarbonProperties;
 import org.apache.carbondata.core.util.CarbonUtil;
-import org.apache.carbondata.core.util.DataTypeUtil;
-import 
org.apache.carbondata.processing.loading.sort.unsafe.UnsafeCarbonRowPage;
+import org.apache.carbondata.processing.loading.row.IntermediateSortTempRow;
+import org.apache.carbondata.processing.loading.sort.SortStepRowHandler;
 import 
org.apache.carbondata.processing.sort.exception.CarbonSortKeyAndGroupByException;
-import org.apache.carbondata.processing.sort.sortdata.NewRowComparator;
+import 
org.apache.carbondata.processing.sort.sortdata.IntermediateSortTempRowComparator;
 import org.apache.carbondata.processing.sort.sortdata.SortParameters;
+import org.apache.carbondata.processing.sort.sortdata.TableFieldStat;
 
 public class UnsafeSortTempFileChunkHolder implements SortTempChunkHolder {
 
@@ -63,21 +62,15 @@ public class UnsafeSortTempFileChunkHolder implements 
SortTempChunkHolder {
* entry count
*/
   private int entryCount;
-
   /**
* return row
*/
-  private Object[] returnRow;
-  private int dimCnt;
-  private int complexCnt;
-  private int measureCnt;
-  private boolean[] isNoDictionaryDimensionColumn;
-  private DataType[] measureDataTypes;
+  private IntermediateSortTempRow returnRow;
   private int readBufferSize;
   private String compressorName;
-  private Object[][] currentBuffer;
+  private IntermediateSortTempRow[] currentBuffer;
 
-  private Object[][] backupBuffer;
+  private IntermediateSortTempRow[] backupBuffer;
 
   private boolean isBackupFilled;
 
@@ -100,27 +93,21 @@ public class UnsafeSortTempFileChunkHolder implements 
SortTempChunkHolder {
 
   private int numberOfObjectRead;
 
-  private int nullSetWordsLength;
-
-  private Comparator comparator;
-
+  private TableFieldStat tableFieldStat;
+  private SortStepRowHandler sortStepRowHandler;
+  private Comparator comparator;
   /**
* Constructor to initialize
*/
   public UnsafeSortTempFileChunkHolder(File tempFile, SortParameters 
parameters) {
 // set temp file
 this.tempFile = tempFile;
-this.dimCnt = parameters.getDimColCount();
-this.complexCnt = parameters.getComplexDimColCount();
-this.measureCnt = parameters.getMeasureColCount();
-this.isNoDictionaryDimensionColumn = 
parameters.getNoDictionaryDimnesionColumn();
-this.measureDataTypes = parameters.getMeasureDataType();
 this.readBufferSize = parameters.getBufferSize();
 this.compressorName = parameters.getSortTempCompressorName();
-
+this.tableFieldStat = new TableFieldStat(parameters);
+this.sortStepRowHandler = new SortStepRowHandler(tableFieldStat);
 this.executorService = Executors.newFixedThreadPool(1);
-this.nullSetWordsLength = ((parameters.getMeasureColCount() - 1) >> 6) + 1;
-comparator = new NewRowComparator(parameters.getNoDictionarySortColumn());
+comparator = new 
IntermediateSortTempRowComparator(parameters.getNoDictionarySortColumn());
 initialize();
   }
 
@@ -169,11 +156,17 @@ public class UnsafeSortTempFileChunkHolder implements 
SortTempChunkHolder {
*
* @throws CarbonSortKeyAndGroupByException problem while reading
*/
+  @Override
   public void readRow() throws CarbonSortKeyAndGroupByException {
 if (prefetch) {
   fillDataForPrefetch();
 } else {
-  this.returnRow = getRowFromStream();
+  try {
+this.returnRow = 
sortStepRowHandler.readIntermediateSortTempRowFromInputStream(stream);
+this.numberOfObjectRead++;
+  } catch (IOException e) {
+throw new CarbonSortKeyAndGroupByException("Problems while reading 
row", e);
+  }
 }
   }
 
@@ -207,63 +200,22 @@ public class UnsafeSortTempFileChunkHolder implements 
SortTempChunkHolder {
   }
 
   /**

[35/50] [abbrv] carbondata git commit: Revert "[CARBONDATA-2023][DataLoad] Add size base block allocation in data loading"

2018-03-04 Thread jackylk
Revert "[CARBONDATA-2023][DataLoad] Add size base block allocation in data 
loading"

This reverts commit 6dd8b038fc898dbf48ad30adfc870c19eb38e3d0.


Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo
Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/1d85e916
Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/1d85e916
Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/1d85e916

Branch: refs/heads/carbonstore-rebase5
Commit: 1d85e916f6a0f070960555fb18ee4cd8acbfa315
Parents: 6216294
Author: Jacky Li 
Authored: Sat Feb 10 10:34:59 2018 +0800
Committer: Jacky Li 
Committed: Sun Mar 4 20:32:13 2018 +0800

--
 .../constants/CarbonLoadOptionConstants.java|  10 -
 .../core/datastore/block/TableBlockInfo.java|  29 --
 .../carbondata/core/util/CarbonProperties.java  |  11 -
 docs/useful-tips-on-carbondata.md   |   1 -
 .../spark/rdd/NewCarbonDataLoadRDD.scala|   4 +-
 .../spark/sql/hive/DistributionUtil.scala   |   2 +-
 .../spark/rdd/CarbonDataRDDFactory.scala|  18 +-
 .../merger/NodeMultiBlockRelation.java  |  40 --
 .../processing/util/CarbonLoaderUtil.java   | 494 +++
 .../processing/util/CarbonLoaderUtilTest.java   | 125 -
 10 files changed, 183 insertions(+), 551 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/carbondata/blob/1d85e916/core/src/main/java/org/apache/carbondata/core/constants/CarbonLoadOptionConstants.java
--
diff --git 
a/core/src/main/java/org/apache/carbondata/core/constants/CarbonLoadOptionConstants.java
 
b/core/src/main/java/org/apache/carbondata/core/constants/CarbonLoadOptionConstants.java
index a6bf60f..bcfeba0 100644
--- 
a/core/src/main/java/org/apache/carbondata/core/constants/CarbonLoadOptionConstants.java
+++ 
b/core/src/main/java/org/apache/carbondata/core/constants/CarbonLoadOptionConstants.java
@@ -114,14 +114,4 @@ public final class CarbonLoadOptionConstants {
*/
   public static final int MAX_EXTERNAL_DICTIONARY_SIZE = 1000;
 
-  /**
-   * enable block size based block allocation while loading data. By default, 
carbondata assigns
-   * blocks to node based on block number. If this option is set to `true`, 
carbondata will
-   * consider block size first and make sure that all the nodes will process 
almost equal size of
-   * data. This option is especially useful when you encounter skewed data.
-   */
-  @CarbonProperty
-  public static final String ENABLE_CARBON_LOAD_SKEWED_DATA_OPTIMIZATION
-  = "carbon.load.skewedDataOptimization.enabled";
-  public static final String 
ENABLE_CARBON_LOAD_SKEWED_DATA_OPTIMIZATION_DEFAULT = "false";
 }

http://git-wip-us.apache.org/repos/asf/carbondata/blob/1d85e916/core/src/main/java/org/apache/carbondata/core/datastore/block/TableBlockInfo.java
--
diff --git 
a/core/src/main/java/org/apache/carbondata/core/datastore/block/TableBlockInfo.java
 
b/core/src/main/java/org/apache/carbondata/core/datastore/block/TableBlockInfo.java
index c0cebe0..a7bfdba 100644
--- 
a/core/src/main/java/org/apache/carbondata/core/datastore/block/TableBlockInfo.java
+++ 
b/core/src/main/java/org/apache/carbondata/core/datastore/block/TableBlockInfo.java
@@ -19,8 +19,6 @@ package org.apache.carbondata.core.datastore.block;
 import java.io.IOException;
 import java.io.Serializable;
 import java.nio.charset.Charset;
-import java.util.Arrays;
-import java.util.Comparator;
 import java.util.HashMap;
 import java.util.Map;
 
@@ -100,20 +98,6 @@ public class TableBlockInfo implements Distributable, 
Serializable {
 
   private String dataMapWriterPath;
 
-  /**
-   * comparator to sort by block size in descending order.
-   * Since each line is not exactly the same, the size of a InputSplit may 
differs,
-   * so we allow some deviation for these splits.
-   */
-  public static final Comparator DATA_SIZE_DESC_COMPARATOR =
-  new Comparator() {
-@Override public int compare(Distributable o1, Distributable o2) {
-  long diff =
-  ((TableBlockInfo) o1).getBlockLength() - ((TableBlockInfo) 
o2).getBlockLength();
-  return diff < 0 ? 1 : (diff == 0 ? 0 : -1);
-}
-  };
-
   public TableBlockInfo(String filePath, long blockOffset, String segmentId,
   String[] locations, long blockLength, ColumnarFormatVersion version,
   String[] deletedDeltaFilePath) {
@@ -450,17 +434,4 @@ public class TableBlockInfo implements Distributable, 
Serializable {
   public void setDataMapWriterPath(String dataMapWriterPath) {
 this.dataMapWriterPath = dataMapWriterPath;
   }
-
-  @Override
-  public String toString() {
-final StringBuilder sb = new StringBuilder("TableBlockInfo{");
- 

[04/50] [abbrv] carbondata git commit: [CARBONDATA-1992] Remove partitionId in CarbonTablePath

2018-03-04 Thread jackylk
http://git-wip-us.apache.org/repos/asf/carbondata/blob/5663e916/processing/src/main/java/org/apache/carbondata/processing/loading/steps/DataWriterBatchProcessorStepImpl.java
--
diff --git 
a/processing/src/main/java/org/apache/carbondata/processing/loading/steps/DataWriterBatchProcessorStepImpl.java
 
b/processing/src/main/java/org/apache/carbondata/processing/loading/steps/DataWriterBatchProcessorStepImpl.java
index f030d52..369c1f2 100644
--- 
a/processing/src/main/java/org/apache/carbondata/processing/loading/steps/DataWriterBatchProcessorStepImpl.java
+++ 
b/processing/src/main/java/org/apache/carbondata/processing/loading/steps/DataWriterBatchProcessorStepImpl.java
@@ -24,6 +24,7 @@ import org.apache.carbondata.common.logging.LogServiceFactory;
 import org.apache.carbondata.core.datastore.row.CarbonRow;
 import org.apache.carbondata.core.metadata.CarbonTableIdentifier;
 import org.apache.carbondata.core.util.CarbonTimeStatisticsFactory;
+import org.apache.carbondata.core.util.path.CarbonTablePath;
 import org.apache.carbondata.processing.loading.AbstractDataLoadProcessorStep;
 import org.apache.carbondata.processing.loading.CarbonDataLoadConfiguration;
 import org.apache.carbondata.processing.loading.DataField;
@@ -59,13 +60,11 @@ public class DataWriterBatchProcessorStepImpl extends 
AbstractDataLoadProcessorS
 child.initialize();
   }
 
-  private String[] getStoreLocation(CarbonTableIdentifier tableIdentifier, 
String partitionId) {
-String[] storeLocation = CarbonDataProcessorUtil
-.getLocalDataFolderLocation(tableIdentifier.getDatabaseName(),
-tableIdentifier.getTableName(), 
String.valueOf(configuration.getTaskNo()), partitionId,
-configuration.getSegmentId() + "", false, false);
-CarbonDataProcessorUtil.createLocations(storeLocation);
-return storeLocation;
+  private String[] getStoreLocation(CarbonTableIdentifier tableIdentifier) {
+return CarbonDataProcessorUtil.getLocalDataFolderLocation(
+tableIdentifier.getDatabaseName(), tableIdentifier.getTableName(),
+String.valueOf(configuration.getTaskNo()),
+configuration.getSegmentId(), false, false);
   }
 
   @Override public Iterator[] execute() throws 
CarbonDataLoadingException {
@@ -75,18 +74,19 @@ public class DataWriterBatchProcessorStepImpl extends 
AbstractDataLoadProcessorS
 String tableName = tableIdentifier.getTableName();
 try {
   CarbonTimeStatisticsFactory.getLoadStatisticsInstance()
-  
.recordDictionaryValue2MdkAdd2FileTime(configuration.getPartitionId(),
+  
.recordDictionaryValue2MdkAdd2FileTime(CarbonTablePath.DEPRECATED_PATITION_ID,
   System.currentTimeMillis());
   int i = 0;
+  String[] storeLocation = getStoreLocation(tableIdentifier);
+  CarbonDataProcessorUtil.createLocations(storeLocation);
   for (Iterator iterator : iterators) {
-String[] storeLocation = getStoreLocation(tableIdentifier, 
String.valueOf(i));
 int k = 0;
 while (iterator.hasNext()) {
   CarbonRowBatch next = iterator.next();
   // If no rows from merge sorter, then don't create a file in fact 
column handler
   if (next.hasNext()) {
 CarbonFactDataHandlerModel model = CarbonFactDataHandlerModel
-.createCarbonFactDataHandlerModel(configuration, 
storeLocation, i, k++);
+.createCarbonFactDataHandlerModel(configuration, 
storeLocation, 0, k++);
 CarbonFactHandler dataHandler = CarbonFactHandlerFactory
 .createCarbonFactHandler(model, 
CarbonFactHandlerFactory.FactHandlerType.COLUMNAR);
 dataHandler.initialise();
@@ -119,10 +119,11 @@ public class DataWriterBatchProcessorStepImpl extends 
AbstractDataLoadProcessorS
 
CarbonTimeStatisticsFactory.getLoadStatisticsInstance().recordTotalRecords(rowCounter.get());
 processingComplete(dataHandler);
 CarbonTimeStatisticsFactory.getLoadStatisticsInstance()
-.recordDictionaryValue2MdkAdd2FileTime(configuration.getPartitionId(),
+
.recordDictionaryValue2MdkAdd2FileTime(CarbonTablePath.DEPRECATED_PATITION_ID,
 System.currentTimeMillis());
 CarbonTimeStatisticsFactory.getLoadStatisticsInstance()
-.recordMdkGenerateTotalTime(configuration.getPartitionId(), 
System.currentTimeMillis());
+.recordMdkGenerateTotalTime(CarbonTablePath.DEPRECATED_PATITION_ID,
+System.currentTimeMillis());
   }
 
   private void processingComplete(CarbonFactHandler dataHandler) {

http://git-wip-us.apache.org/repos/asf/carbondata/blob/5663e916/processing/src/main/java/org/apache/carbondata/processing/loading/steps/DataWriterProcessorStepImpl.java
--
diff --git 
a/processing/src/main/java/org/apache/carbondata/processing/loading/steps/DataWriterProcessorStepImpl.java
 

[45/50] [abbrv] carbondata git commit: [CARBONDATA-2091][DataLoad] Support specifying sort column bounds in data loading

2018-03-04 Thread jackylk
http://git-wip-us.apache.org/repos/asf/carbondata/blob/faad967d/processing/src/main/java/org/apache/carbondata/processing/loading/sort/impl/UnsafeParallelReadMergeSorterWithBucketingImpl.java
--
diff --git 
a/processing/src/main/java/org/apache/carbondata/processing/loading/sort/impl/UnsafeParallelReadMergeSorterWithBucketingImpl.java
 
b/processing/src/main/java/org/apache/carbondata/processing/loading/sort/impl/UnsafeParallelReadMergeSorterWithBucketingImpl.java
deleted file mode 100644
index f605b22..000
--- 
a/processing/src/main/java/org/apache/carbondata/processing/loading/sort/impl/UnsafeParallelReadMergeSorterWithBucketingImpl.java
+++ /dev/null
@@ -1,263 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.carbondata.processing.loading.sort.impl;
-
-import java.io.File;
-import java.util.Iterator;
-import java.util.List;
-import java.util.concurrent.ExecutorService;
-import java.util.concurrent.Executors;
-import java.util.concurrent.TimeUnit;
-
-import org.apache.carbondata.common.CarbonIterator;
-import org.apache.carbondata.common.logging.LogService;
-import org.apache.carbondata.common.logging.LogServiceFactory;
-import org.apache.carbondata.core.constants.CarbonCommonConstants;
-import org.apache.carbondata.core.datastore.row.CarbonRow;
-import org.apache.carbondata.core.memory.MemoryException;
-import org.apache.carbondata.core.metadata.schema.BucketingInfo;
-import org.apache.carbondata.core.util.CarbonProperties;
-import org.apache.carbondata.core.util.CarbonTimeStatisticsFactory;
-import org.apache.carbondata.processing.loading.DataField;
-import 
org.apache.carbondata.processing.loading.exception.CarbonDataLoadingException;
-import org.apache.carbondata.processing.loading.row.CarbonRowBatch;
-import org.apache.carbondata.processing.loading.sort.AbstractMergeSorter;
-import 
org.apache.carbondata.processing.loading.sort.unsafe.UnsafeCarbonRowPage;
-import org.apache.carbondata.processing.loading.sort.unsafe.UnsafeSortDataRows;
-import 
org.apache.carbondata.processing.loading.sort.unsafe.merger.UnsafeIntermediateMerger;
-import 
org.apache.carbondata.processing.loading.sort.unsafe.merger.UnsafeSingleThreadFinalSortFilesMerger;
-import org.apache.carbondata.processing.sort.sortdata.SortParameters;
-import org.apache.carbondata.processing.util.CarbonDataProcessorUtil;
-
-/**
- * It parallely reads data from array of iterates and do merge sort.
- * First it sorts the data and write to temp files. These temp files will be 
merge sorted to get
- * final merge sort result.
- * This step is specifically for bucketing, it sorts each bucket data 
separately and write to
- * temp files.
- */
-public class UnsafeParallelReadMergeSorterWithBucketingImpl extends 
AbstractMergeSorter {
-
-  private static final LogService LOGGER =
-  LogServiceFactory.getLogService(
-
UnsafeParallelReadMergeSorterWithBucketingImpl.class.getName());
-
-  private SortParameters sortParameters;
-
-  private BucketingInfo bucketingInfo;
-
-  public UnsafeParallelReadMergeSorterWithBucketingImpl(DataField[] 
inputDataFields,
-  BucketingInfo bucketingInfo) {
-this.bucketingInfo = bucketingInfo;
-  }
-
-  @Override public void initialize(SortParameters sortParameters) {
-this.sortParameters = sortParameters;
-  }
-
-  @Override public Iterator[] sort(Iterator[] 
iterators)
-  throws CarbonDataLoadingException {
-UnsafeSortDataRows[] sortDataRows = new 
UnsafeSortDataRows[bucketingInfo.getNumberOfBuckets()];
-UnsafeIntermediateMerger[] intermediateFileMergers =
-new UnsafeIntermediateMerger[sortDataRows.length];
-int inMemoryChunkSizeInMB = 
CarbonProperties.getInstance().getSortMemoryChunkSizeInMB();
-inMemoryChunkSizeInMB = inMemoryChunkSizeInMB / 
bucketingInfo.getNumberOfBuckets();
-if (inMemoryChunkSizeInMB < 5) {
-  inMemoryChunkSizeInMB = 5;
-}
-try {
-  for (int i = 0; i < bucketingInfo.getNumberOfBuckets(); i++) {
-SortParameters parameters = sortParameters.getCopy();
-parameters.setPartitionID(i + "");
-setTempLocation(parameters);
-

[16/50] [abbrv] carbondata git commit: [CARBONDATA-2099] Refactor query scan process to improve readability

2018-03-04 Thread jackylk
http://git-wip-us.apache.org/repos/asf/carbondata/blob/55c4e438/core/src/main/java/org/apache/carbondata/core/datastore/chunk/impl/FixedLengthDimensionDataChunk.java
--
diff --git 
a/core/src/main/java/org/apache/carbondata/core/datastore/chunk/impl/FixedLengthDimensionDataChunk.java
 
b/core/src/main/java/org/apache/carbondata/core/datastore/chunk/impl/FixedLengthDimensionDataChunk.java
deleted file mode 100644
index 6629d31..000
--- 
a/core/src/main/java/org/apache/carbondata/core/datastore/chunk/impl/FixedLengthDimensionDataChunk.java
+++ /dev/null
@@ -1,163 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.carbondata.core.datastore.chunk.impl;
-
-import org.apache.carbondata.core.constants.CarbonCommonConstants;
-import 
org.apache.carbondata.core.datastore.chunk.store.DimensionChunkStoreFactory;
-import 
org.apache.carbondata.core.datastore.chunk.store.DimensionChunkStoreFactory.DimensionStoreType;
-import org.apache.carbondata.core.metadata.datatype.DataType;
-import org.apache.carbondata.core.metadata.datatype.DataTypes;
-import org.apache.carbondata.core.scan.executor.infos.KeyStructureInfo;
-import org.apache.carbondata.core.scan.result.vector.CarbonColumnVector;
-import org.apache.carbondata.core.scan.result.vector.ColumnVectorInfo;
-
-/**
- * This class is gives access to fixed length dimension data chunk store
- */
-public class FixedLengthDimensionDataChunk extends AbstractDimensionDataChunk {
-
-  /**
-   * Constructor
-   *
-   * @param dataChunkdata chunk
-   * @param invertedIndexinverted index
-   * @param invertedIndexReverse reverse inverted index
-   * @param numberOfRows number of rows
-   * @param columnValueSize  size of each column value
-   */
-  public FixedLengthDimensionDataChunk(byte[] dataChunk, int[] invertedIndex,
-  int[] invertedIndexReverse, int numberOfRows, int columnValueSize) {
-long totalSize = null != invertedIndex ?
-dataChunk.length + (2 * numberOfRows * 
CarbonCommonConstants.INT_SIZE_IN_BYTE) :
-dataChunk.length;
-dataChunkStore = DimensionChunkStoreFactory.INSTANCE
-.getDimensionChunkStore(columnValueSize, null != invertedIndex, 
numberOfRows, totalSize,
-DimensionStoreType.FIXEDLENGTH);
-dataChunkStore.putArray(invertedIndex, invertedIndexReverse, dataChunk);
-  }
-
-  /**
-   * Below method will be used to fill the data based on offset and row id
-   *
-   * @param data data to filed
-   * @param offset   offset from which data need to be filed
-   * @param indexrow id of the chunk
-   * @param keyStructureInfo define the structure of the key
-   * @return how many bytes was copied
-   */
-  @Override public int fillChunkData(byte[] data, int offset, int index,
-  KeyStructureInfo keyStructureInfo) {
-dataChunkStore.fillRow(index, data, offset);
-return dataChunkStore.getColumnValueSize();
-  }
-
-  /**
-   * Converts to column dictionary integer value
-   *
-   * @param rowId
-   * @param columnIndex
-   * @param row
-   * @param restructuringInfo
-   * @return
-   */
-  @Override public int fillConvertedChunkData(int rowId, int columnIndex, 
int[] row,
-  KeyStructureInfo restructuringInfo) {
-row[columnIndex] = dataChunkStore.getSurrogate(rowId);
-return columnIndex + 1;
-  }
-
-  /**
-   * Fill the data to vector
-   *
-   * @param vectorInfo
-   * @param column
-   * @param restructuringInfo
-   * @return next column index
-   */
-  @Override public int fillConvertedChunkData(ColumnVectorInfo[] vectorInfo, 
int column,
-  KeyStructureInfo restructuringInfo) {
-ColumnVectorInfo columnVectorInfo = vectorInfo[column];
-int offset = columnVectorInfo.offset;
-int vectorOffset = columnVectorInfo.vectorOffset;
-int len = columnVectorInfo.size + offset;
-CarbonColumnVector vector = columnVectorInfo.vector;
-for (int j = offset; j < len; j++) {
-  int dict = dataChunkStore.getSurrogate(j);
-  if (columnVectorInfo.directDictionaryGenerator == null) {
-vector.putInt(vectorOffset++, dict);
-  } else {
-Object 

[33/50] [abbrv] carbondata git commit: [REBASE] resolve conflict after rebasing to master

2018-03-04 Thread jackylk
[REBASE] resolve conflict after rebasing to master


Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo
Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/880bbceb
Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/880bbceb
Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/880bbceb

Branch: refs/heads/carbonstore-rebase5
Commit: 880bbcebf967d71f6932793114134acacfd26b3f
Parents: 111bb5c
Author: Jacky Li 
Authored: Tue Feb 27 08:51:25 2018 +0800
Committer: Jacky Li 
Committed: Sun Mar 4 20:04:48 2018 +0800

--
 .../core/datamap/dev/AbstractDataMapWriter.java |  5 ++--
 .../core/datamap/dev/DataMapFactory.java|  2 +-
 .../blockletindex/BlockletDataMapFactory.java   |  2 +-
 .../SegmentUpdateStatusManager.java |  9 +-
 .../datamap/examples/MinMaxDataMapFactory.java  |  5 ++--
 .../datamap/examples/MinMaxDataWriter.java  |  7 +++--
 .../testsuite/datamap/CGDataMapTestCase.scala   | 26 
 .../testsuite/datamap/DataMapWriterSuite.scala  | 19 ++--
 .../testsuite/datamap/FGDataMapTestCase.scala   | 31 +---
 .../iud/DeleteCarbonTableTestCase.scala |  2 +-
 .../TestInsertAndOtherCommandConcurrent.scala   | 14 +
 .../StandardPartitionTableCleanTestCase.scala   | 12 
 .../carbondata/spark/util/DataLoadingUtil.scala |  2 +-
 .../datamap/DataMapWriterListener.java  |  2 +-
 .../processing/merger/CarbonDataMergerUtil.java |  8 +
 .../merger/CompactionResultSortProcessor.java   |  4 +--
 .../merger/RowResultMergerProcessor.java|  5 ++--
 .../partition/spliter/RowResultProcessor.java   |  5 ++--
 .../util/CarbonDataProcessorUtil.java   |  4 +--
 .../processing/util/CarbonLoaderUtil.java   |  9 --
 20 files changed, 73 insertions(+), 100 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/carbondata/blob/880bbceb/core/src/main/java/org/apache/carbondata/core/datamap/dev/AbstractDataMapWriter.java
--
diff --git 
a/core/src/main/java/org/apache/carbondata/core/datamap/dev/AbstractDataMapWriter.java
 
b/core/src/main/java/org/apache/carbondata/core/datamap/dev/AbstractDataMapWriter.java
index bcc9bad..de6dcb1 100644
--- 
a/core/src/main/java/org/apache/carbondata/core/datamap/dev/AbstractDataMapWriter.java
+++ 
b/core/src/main/java/org/apache/carbondata/core/datamap/dev/AbstractDataMapWriter.java
@@ -18,6 +18,7 @@ package org.apache.carbondata.core.datamap.dev;
 
 import java.io.IOException;
 
+import org.apache.carbondata.core.datamap.Segment;
 import org.apache.carbondata.core.datastore.impl.FileFactory;
 import org.apache.carbondata.core.datastore.page.ColumnPage;
 import org.apache.carbondata.core.metadata.AbsoluteTableIdentifier;
@@ -35,10 +36,10 @@ public abstract class AbstractDataMapWriter {
 
   protected String writeDirectoryPath;
 
-  public AbstractDataMapWriter(AbsoluteTableIdentifier identifier, String 
segmentId,
+  public AbstractDataMapWriter(AbsoluteTableIdentifier identifier, Segment 
segment,
   String writeDirectoryPath) {
 this.identifier = identifier;
-this.segmentId = segmentId;
+this.segmentId = segment.getSegmentNo();
 this.writeDirectoryPath = writeDirectoryPath;
   }
 

http://git-wip-us.apache.org/repos/asf/carbondata/blob/880bbceb/core/src/main/java/org/apache/carbondata/core/datamap/dev/DataMapFactory.java
--
diff --git 
a/core/src/main/java/org/apache/carbondata/core/datamap/dev/DataMapFactory.java 
b/core/src/main/java/org/apache/carbondata/core/datamap/dev/DataMapFactory.java
index df5670d..50ac279 100644
--- 
a/core/src/main/java/org/apache/carbondata/core/datamap/dev/DataMapFactory.java
+++ 
b/core/src/main/java/org/apache/carbondata/core/datamap/dev/DataMapFactory.java
@@ -39,7 +39,7 @@ public interface DataMapFactory {
   /**
* Return a new write for this datamap
*/
-  AbstractDataMapWriter createWriter(Segment segment);
+  AbstractDataMapWriter createWriter(Segment segment, String 
writeDirectoryPath);
 
   /**
* Get the datamap for segmentid

http://git-wip-us.apache.org/repos/asf/carbondata/blob/880bbceb/core/src/main/java/org/apache/carbondata/core/indexstore/blockletindex/BlockletDataMapFactory.java
--
diff --git 
a/core/src/main/java/org/apache/carbondata/core/indexstore/blockletindex/BlockletDataMapFactory.java
 
b/core/src/main/java/org/apache/carbondata/core/indexstore/blockletindex/BlockletDataMapFactory.java
index efe2b71..ee849bd 100644
--- 
a/core/src/main/java/org/apache/carbondata/core/indexstore/blockletindex/BlockletDataMapFactory.java
+++ 

[43/50] [abbrv] carbondata git commit: [CARBONDATA-2018][DataLoad] Optimization in reading/writing for sort temp row

2018-03-04 Thread jackylk
[CARBONDATA-2018][DataLoad] Optimization in reading/writing for sort temp row

Pick up the no-sort fields in the row and pack them as bytes array and skip 
parsing them during merge sort to reduce CPU consumption

This closes #1792


Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo
Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/daecc774
Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/daecc774
Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/daecc774

Branch: refs/heads/carbonstore-rebase5
Commit: daecc774b4bd0a55811a700df104f4d75e7664dd
Parents: 83df87d
Author: xuchuanyin 
Authored: Thu Feb 8 14:35:14 2018 +0800
Committer: Jacky Li 
Committed: Sun Mar 4 20:08:30 2018 +0800

--
 .../carbondata/core/util/NonDictionaryUtil.java |  67 +--
 .../presto/util/CarbonDataStoreCreator.scala|   1 -
 .../load/DataLoadProcessorStepOnSpark.scala |   6 +-
 .../loading/row/IntermediateSortTempRow.java| 117 +
 .../loading/sort/SortStepRowHandler.java| 466 +++
 .../loading/sort/SortStepRowUtil.java   | 103 
 .../sort/unsafe/UnsafeCarbonRowPage.java| 331 ++---
 .../loading/sort/unsafe/UnsafeSortDataRows.java |  57 +--
 .../unsafe/comparator/UnsafeRowComparator.java  |  95 ++--
 .../UnsafeRowComparatorForNormalDIms.java   |  59 ---
 .../UnsafeRowComparatorForNormalDims.java   |  59 +++
 .../sort/unsafe/holder/SortTempChunkHolder.java |   3 +-
 .../holder/UnsafeFinalMergePageHolder.java  |  19 +-
 .../unsafe/holder/UnsafeInmemoryHolder.java |  21 +-
 .../holder/UnsafeSortTempFileChunkHolder.java   | 138 ++
 .../merger/UnsafeIntermediateFileMerger.java| 118 +
 .../UnsafeSingleThreadFinalSortFilesMerger.java |  27 +-
 .../merger/CompactionResultSortProcessor.java   |   1 -
 .../sort/sortdata/IntermediateFileMerger.java   |  95 +---
 .../IntermediateSortTempRowComparator.java  |  73 +++
 .../sort/sortdata/NewRowComparator.java |   5 +-
 .../sortdata/NewRowComparatorForNormalDims.java |   3 +-
 .../processing/sort/sortdata/RowComparator.java |  94 
 .../sortdata/RowComparatorForNormalDims.java|  62 ---
 .../SingleThreadFinalSortFilesMerger.java   |  25 +-
 .../processing/sort/sortdata/SortDataRows.java  |  85 +---
 .../sort/sortdata/SortTempFileChunkHolder.java  | 174 ++-
 .../sort/sortdata/TableFieldStat.java   | 176 +++
 28 files changed, 1186 insertions(+), 1294 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/carbondata/blob/daecc774/core/src/main/java/org/apache/carbondata/core/util/NonDictionaryUtil.java
--
diff --git 
a/core/src/main/java/org/apache/carbondata/core/util/NonDictionaryUtil.java 
b/core/src/main/java/org/apache/carbondata/core/util/NonDictionaryUtil.java
index d6ecfbc..fca1244 100644
--- a/core/src/main/java/org/apache/carbondata/core/util/NonDictionaryUtil.java
+++ b/core/src/main/java/org/apache/carbondata/core/util/NonDictionaryUtil.java
@@ -82,18 +82,26 @@ public class NonDictionaryUtil {
   }
 
   /**
-   * Method to get the required Dimension from obj []
+   * Method to get the required dictionary Dimension from obj []
*
* @param index
* @param row
* @return
*/
-  public static Integer getDimension(int index, Object[] row) {
-
-Integer[] dimensions = (Integer[]) 
row[WriteStepRowUtil.DICTIONARY_DIMENSION];
-
+  public static int getDictDimension(int index, Object[] row) {
+int[] dimensions = (int[]) row[WriteStepRowUtil.DICTIONARY_DIMENSION];
 return dimensions[index];
+  }
 
+  /**
+   * Method to get the required non-dictionary & complex from 3-parted row
+   * @param index
+   * @param row
+   * @return
+   */
+  public static byte[] getNoDictOrComplex(int index, Object[] row) {
+byte[][] nonDictArray = (byte[][]) 
row[WriteStepRowUtil.NO_DICTIONARY_AND_COMPLEX];
+return nonDictArray[index];
   }
 
   /**
@@ -108,60 +116,11 @@ public class NonDictionaryUtil {
 return measures[index];
   }
 
-  public static byte[] getByteArrayForNoDictionaryCols(Object[] row) {
-
-return (byte[]) row[WriteStepRowUtil.NO_DICTIONARY_AND_COMPLEX];
-  }
-
   public static void prepareOutObj(Object[] out, int[] dimArray, byte[][] 
byteBufferArr,
   Object[] measureArray) {
-
 out[WriteStepRowUtil.DICTIONARY_DIMENSION] = dimArray;
 out[WriteStepRowUtil.NO_DICTIONARY_AND_COMPLEX] = byteBufferArr;
 out[WriteStepRowUtil.MEASURE] = measureArray;
 
   }
-
-  /**
-   * This method will extract the single dimension from the complete high card 
dims byte[].+ *
-   * The format of the byte [] will be,  Totallength,CompleteStartOffsets,Dat
-   *
-   * @param highCardArr
-   * @param index
-   * @param highCardinalityCount
-   

[38/50] [abbrv] carbondata git commit: [CARBONDATA-2156] Add interface annotation

2018-03-04 Thread jackylk
[CARBONDATA-2156] Add interface annotation

InterfaceAudience and InterfaceStability annotation should be added for user 
and developer

1.InetfaceAudience can be User and Developer
2.InterfaceStability can be Stable, Evolving, Unstable

This closes #1968


Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo
Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/68c16bb5
Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/68c16bb5
Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/68c16bb5

Branch: refs/heads/carbonstore-rebase5
Commit: 68c16bb5e26001abdae6d521742e0dfa1fc808d9
Parents: bfdf3e3
Author: Jacky Li 
Authored: Sun Feb 11 10:12:10 2018 +0800
Committer: Jacky Li 
Committed: Sun Mar 4 20:04:49 2018 +0800

--
 .../common/annotations/InterfaceAudience.java   | 58 
 .../common/annotations/InterfaceStability.java  | 69 
 2 files changed, 127 insertions(+)
--


http://git-wip-us.apache.org/repos/asf/carbondata/blob/68c16bb5/common/src/main/java/org/apache/carbondata/common/annotations/InterfaceAudience.java
--
diff --git 
a/common/src/main/java/org/apache/carbondata/common/annotations/InterfaceAudience.java
 
b/common/src/main/java/org/apache/carbondata/common/annotations/InterfaceAudience.java
new file mode 100644
index 000..fa9729d
--- /dev/null
+++ 
b/common/src/main/java/org/apache/carbondata/common/annotations/InterfaceAudience.java
@@ -0,0 +1,58 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.carbondata.common.annotations;
+
+import java.lang.annotation.Documented;
+import java.lang.annotation.Retention;
+import java.lang.annotation.RetentionPolicy;
+
+/**
+ * This annotation is ported and modified from Apache Hadoop project.
+ *
+ * Annotation to inform users of a package, class or method's intended 
audience.
+ * Currently the audience can be {@link User}, {@link Developer}
+ *
+ * Public classes that are not marked with this annotation must be
+ * considered by default as {@link Developer}.
+ *
+ * External applications must only use classes that are marked {@link User}.
+ *
+ * Methods may have a different annotation that it is more restrictive
+ * compared to the audience classification of the class. Example: A class
+ * might be {@link User}, but a method may be {@link Developer}
+ */
+@InterfaceAudience.User
+@InterfaceStability.Evolving
+public class InterfaceAudience {
+  /**
+   * Intended for use by any project or application.
+   */
+  @Documented
+  @Retention(RetentionPolicy.RUNTIME)
+  public @interface User { }
+
+  /**
+   * Intended only for developers to extend interface for CarbonData project
+   * For example, new Datamap implementations.
+   */
+  @Documented
+  @Retention(RetentionPolicy.RUNTIME)
+  public @interface Developer { }
+
+  private InterfaceAudience() { } // Audience can't exist on its own
+}

http://git-wip-us.apache.org/repos/asf/carbondata/blob/68c16bb5/common/src/main/java/org/apache/carbondata/common/annotations/InterfaceStability.java
--
diff --git 
a/common/src/main/java/org/apache/carbondata/common/annotations/InterfaceStability.java
 
b/common/src/main/java/org/apache/carbondata/common/annotations/InterfaceStability.java
new file mode 100644
index 000..b8e5e52
--- /dev/null
+++ 
b/common/src/main/java/org/apache/carbondata/common/annotations/InterfaceStability.java
@@ -0,0 +1,69 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by 

[48/50] [abbrv] carbondata git commit: [CARBONDATA-2186] Add InterfaceAudience.Internal to annotate internal interface

2018-03-04 Thread jackylk
[CARBONDATA-2186] Add InterfaceAudience.Internal to annotate internal interface

This closes #1986


Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo
Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/8996cd4a
Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/8996cd4a
Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/8996cd4a

Branch: refs/heads/carbonstore-rebase5
Commit: 8996cd4a412ddb4bcded85a7320b106d20692c52
Parents: d32c0cf
Author: Jacky Li 
Authored: Tue Feb 20 11:16:53 2018 +0800
Committer: Jacky Li 
Committed: Sun Mar 4 20:08:30 2018 +0800

--
 .../java/org/apache/carbondata/common/Maps.java  |  2 +-
 .../org/apache/carbondata/common/Strings.java|  2 +-
 .../common/annotations/InterfaceAudience.java| 19 ++-
 .../common/annotations/InterfaceStability.java   |  2 +-
 .../loading/model/CarbonLoadModelBuilder.java|  2 +-
 .../processing/loading/model/LoadOption.java |  2 +-
 .../carbondata/sdk/file/CSVCarbonWriter.java |  4 +---
 7 files changed, 20 insertions(+), 13 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/carbondata/blob/8996cd4a/common/src/main/java/org/apache/carbondata/common/Maps.java
--
diff --git a/common/src/main/java/org/apache/carbondata/common/Maps.java 
b/common/src/main/java/org/apache/carbondata/common/Maps.java
index 14fc329..4e76192 100644
--- a/common/src/main/java/org/apache/carbondata/common/Maps.java
+++ b/common/src/main/java/org/apache/carbondata/common/Maps.java
@@ -21,7 +21,7 @@ import java.util.Map;
 
 import org.apache.carbondata.common.annotations.InterfaceAudience;
 
-@InterfaceAudience.Developer
+@InterfaceAudience.Internal
 public class Maps {
 
   /**

http://git-wip-us.apache.org/repos/asf/carbondata/blob/8996cd4a/common/src/main/java/org/apache/carbondata/common/Strings.java
--
diff --git a/common/src/main/java/org/apache/carbondata/common/Strings.java 
b/common/src/main/java/org/apache/carbondata/common/Strings.java
index 08fdc3c..23c7f9f 100644
--- a/common/src/main/java/org/apache/carbondata/common/Strings.java
+++ b/common/src/main/java/org/apache/carbondata/common/Strings.java
@@ -21,7 +21,7 @@ import java.util.Objects;
 
 import org.apache.carbondata.common.annotations.InterfaceAudience;
 
-@InterfaceAudience.Developer
+@InterfaceAudience.Internal
 public class Strings {
 
   /**

http://git-wip-us.apache.org/repos/asf/carbondata/blob/8996cd4a/common/src/main/java/org/apache/carbondata/common/annotations/InterfaceAudience.java
--
diff --git 
a/common/src/main/java/org/apache/carbondata/common/annotations/InterfaceAudience.java
 
b/common/src/main/java/org/apache/carbondata/common/annotations/InterfaceAudience.java
index fa9729d..8d214ff 100644
--- 
a/common/src/main/java/org/apache/carbondata/common/annotations/InterfaceAudience.java
+++ 
b/common/src/main/java/org/apache/carbondata/common/annotations/InterfaceAudience.java
@@ -25,10 +25,10 @@ import java.lang.annotation.RetentionPolicy;
  * This annotation is ported and modified from Apache Hadoop project.
  *
  * Annotation to inform users of a package, class or method's intended 
audience.
- * Currently the audience can be {@link User}, {@link Developer}
+ * Currently the audience can be {@link User}, {@link Developer}, {@link 
Internal}
  *
  * Public classes that are not marked with this annotation must be
- * considered by default as {@link Developer}.
+ * considered by default as {@link Internal}.
  *
  * External applications must only use classes that are marked {@link User}.
  *
@@ -47,12 +47,21 @@ public class InterfaceAudience {
   public @interface User { }
 
   /**
-   * Intended only for developers to extend interface for CarbonData project
-   * For example, new Datamap implementations.
+   * Intended for developers to develop extension for Apache CarbonData project
+   * For example, "Index DataMap" to add a new index implementation, etc
*/
   @Documented
   @Retention(RetentionPolicy.RUNTIME)
-  public @interface Developer { }
+  public @interface Developer {
+String[] value();
+  }
+
+  /**
+   * Intended only for internal usage within Apache CarbonData project.
+   */
+  @Documented
+  @Retention(RetentionPolicy.RUNTIME)
+  public @interface Internal { }
 
   private InterfaceAudience() { } // Audience can't exist on its own
 }

http://git-wip-us.apache.org/repos/asf/carbondata/blob/8996cd4a/common/src/main/java/org/apache/carbondata/common/annotations/InterfaceStability.java
--
diff --git 

[19/50] [abbrv] carbondata git commit: [CARBONDATA-2025] Unify all path construction through CarbonTablePath static method

2018-03-04 Thread jackylk
http://git-wip-us.apache.org/repos/asf/carbondata/blob/9b9125b6/processing/src/test/java/org/apache/carbondata/carbon/datastore/BlockIndexStoreTest.java
--
diff --git 
a/processing/src/test/java/org/apache/carbondata/carbon/datastore/BlockIndexStoreTest.java
 
b/processing/src/test/java/org/apache/carbondata/carbon/datastore/BlockIndexStoreTest.java
index cd1e28a..d30891a 100644
--- 
a/processing/src/test/java/org/apache/carbondata/carbon/datastore/BlockIndexStoreTest.java
+++ 
b/processing/src/test/java/org/apache/carbondata/carbon/datastore/BlockIndexStoreTest.java
@@ -63,30 +63,6 @@ public class BlockIndexStoreTest extends TestCase {
 
   }
 
-//  public void testLoadAndGetTaskIdToSegmentsMapForSingleSegment()
-//  throws IOException {
-//File file = getPartFile();
-//TableBlockInfo info =
-//new TableBlockInfo(file.getAbsolutePath(), 0, "0", new String[] { 
"loclhost" },
-//file.length(), ColumnarFormatVersion.V1, null);
-//CarbonTableIdentifier carbonTableIdentifier =
-//new 
CarbonTableIdentifier(CarbonCommonConstants.DATABASE_DEFAULT_NAME, "t3", "1");
-//AbsoluteTableIdentifier absoluteTableIdentifier =
-//new AbsoluteTableIdentifier("/src/test/resources", 
carbonTableIdentifier);
-//try {
-//
-//  List tableBlockInfoList =
-//  getTableBlockUniqueIdentifierList(Arrays.asList(new 
TableBlockInfo[] { info }), absoluteTableIdentifier);
-//  List loadAndGetBlocks = 
cache.getAll(tableBlockInfoList);
-//  assertTrue(loadAndGetBlocks.size() == 1);
-//} catch (Exception e) {
-//  assertTrue(false);
-//}
-//List segmentIds = new ArrayList<>();
-//  segmentIds.add(info.getSegment());
-//cache.removeTableBlocks(segmentIds, absoluteTableIdentifier);
-//  }
-//
   private List 
getTableBlockUniqueIdentifierList(List tableBlockInfos,
   AbsoluteTableIdentifier absoluteTableIdentifier) {
 List tableBlockUniqueIdentifiers = new 
ArrayList<>();
@@ -95,138 +71,6 @@ public class BlockIndexStoreTest extends TestCase {
 }
 return tableBlockUniqueIdentifiers;
   }
-//
-//  public void 
testloadAndGetTaskIdToSegmentsMapForSameBlockLoadedConcurrently()
-//  throws IOException {
-//String canonicalPath =
-//new File(this.getClass().getResource("/").getPath() + 
"/../../").getCanonicalPath();
-//File file = getPartFile();
-//TableBlockInfo info =
-//new TableBlockInfo(file.getAbsolutePath(), 0, "0", new String[] { 
"loclhost" },
-//file.length(), ColumnarFormatVersion.V1, null);
-//TableBlockInfo info1 =
-//new TableBlockInfo(file.getAbsolutePath(), 0, "0", new String[] { 
"loclhost" },
-//file.length(), ColumnarFormatVersion.V1, null);
-//
-//TableBlockInfo info2 =
-//new TableBlockInfo(file.getAbsolutePath(), 0, "1", new String[] { 
"loclhost" },
-//file.length(), ColumnarFormatVersion.V1, null);
-//TableBlockInfo info3 =
-//new TableBlockInfo(file.getAbsolutePath(), 0, "1", new String[] { 
"loclhost" },
-//file.length(), ColumnarFormatVersion.V1, null);
-//TableBlockInfo info4 =
-//new TableBlockInfo(file.getAbsolutePath(), 0, "1", new String[] { 
"loclhost" },
-//file.length(), ColumnarFormatVersion.V1, null);
-//
-//CarbonTableIdentifier carbonTableIdentifier =
-//new 
CarbonTableIdentifier(CarbonCommonConstants.DATABASE_DEFAULT_NAME, "t3", "1");
-//AbsoluteTableIdentifier absoluteTableIdentifier =
-//new AbsoluteTableIdentifier("/src/test/resources", 
carbonTableIdentifier);
-//ExecutorService executor = Executors.newFixedThreadPool(3);
-//executor.submit(new BlockLoaderThread(Arrays.asList(new TableBlockInfo[] 
{ info, info1 }),
-//absoluteTableIdentifier));
-//executor.submit(
-//new BlockLoaderThread(Arrays.asList(new TableBlockInfo[] { info2, 
info3, info4 }),
-//absoluteTableIdentifier));
-//executor.submit(new BlockLoaderThread(Arrays.asList(new TableBlockInfo[] 
{ info, info1 }),
-//absoluteTableIdentifier));
-//executor.submit(
-//new BlockLoaderThread(Arrays.asList(new TableBlockInfo[] { info2, 
info3, info4 }),
-//absoluteTableIdentifier));
-//executor.shutdown();
-//try {
-//  executor.awaitTermination(1, TimeUnit.DAYS);
-//} catch (InterruptedException e) {
-//  e.printStackTrace();
-//}
-//List tableBlockInfos =
-//Arrays.asList(new TableBlockInfo[] { info, info1, info2, info3, 
info4 });
-//try {
-//  List tableBlockUniqueIdentifiers =
-//  getTableBlockUniqueIdentifierList(tableBlockInfos, 
absoluteTableIdentifier);
-//  List loadAndGetBlocks = 
cache.getAll(tableBlockUniqueIdentifiers);
-//  assertTrue(loadAndGetBlocks.size() == 5);
-//} catch (Exception e) {
-//  assertTrue(false);
-//}
-//List 

[34/50] [abbrv] carbondata git commit: [CARBONDATA-1997] Add CarbonWriter SDK API

2018-03-04 Thread jackylk
[CARBONDATA-1997] Add CarbonWriter SDK API

Added a new module called store-sdk, and added a CarbonWriter API, it can be 
used to write Carbondata files to a specified folder, without Spark and Hadoop 
dependency. User can use this API in any environment.

This closes #1967


Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo
Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/a9508633
Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/a9508633
Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/a9508633

Branch: refs/heads/carbonstore-rebase5
Commit: a95086333e665752347b67812a663eac5e8ca8c7
Parents: 68c16bb
Author: Jacky Li 
Authored: Sat Feb 10 19:44:23 2018 +0800
Committer: Jacky Li 
Committed: Sun Mar 4 20:04:49 2018 +0800

--
 .../org/apache/carbondata/common/Strings.java   |  40 
 .../apache/carbondata/common/StringsSuite.java  |  53 +
 .../core/metadata/schema/table/CarbonTable.java |   7 +
 .../schema/table/CarbonTableBuilder.java|  72 +++
 .../core/metadata/schema/table/TableSchema.java |   7 +
 .../schema/table/TableSchemaBuilder.java| 107 ++
 .../schema/table/CarbonTableBuilderSuite.java   |  86 
 .../metadata/schema/table/CarbonTableTest.java  |  12 +-
 .../schema/table/TableSchemaBuilderSuite.java   |  56 ++
 .../carbondata/spark/util/DataLoadingUtil.scala |  45 +
 pom.xml |   7 +
 store/sdk/pom.xml   | 130 +
 .../carbondata/sdk/file/CSVCarbonWriter.java|  89 +
 .../carbondata/sdk/file/CarbonWriter.java   |  51 +
 .../sdk/file/CarbonWriterBuilder.java   | 194 +++
 .../org/apache/carbondata/sdk/file/Field.java   |  74 +++
 .../org/apache/carbondata/sdk/file/Schema.java  |  74 +++
 .../sdk/file/CSVCarbonWriterSuite.java  | 127 
 18 files changed, 1225 insertions(+), 6 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/carbondata/blob/a9508633/common/src/main/java/org/apache/carbondata/common/Strings.java
--
diff --git a/common/src/main/java/org/apache/carbondata/common/Strings.java 
b/common/src/main/java/org/apache/carbondata/common/Strings.java
new file mode 100644
index 000..23288dd
--- /dev/null
+++ b/common/src/main/java/org/apache/carbondata/common/Strings.java
@@ -0,0 +1,40 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.carbondata.common;
+
+import java.util.Objects;
+
+public class Strings {
+
+  /**
+   * Provide same function as mkString in Scala.
+   * This is added to avoid JDK 8 dependency.
+   */
+  public static String mkString(String[] strings, String delimeter) {
+Objects.requireNonNull(strings);
+Objects.requireNonNull(delimeter);
+StringBuilder builder = new StringBuilder();
+for (int i = 0; i < strings.length; i++) {
+  builder.append(strings[i]);
+  if (i != strings.length - 1) {
+builder.append(delimeter);
+  }
+}
+return builder.toString();
+  }
+}

http://git-wip-us.apache.org/repos/asf/carbondata/blob/a9508633/common/src/test/java/org/apache/carbondata/common/StringsSuite.java
--
diff --git 
a/common/src/test/java/org/apache/carbondata/common/StringsSuite.java 
b/common/src/test/java/org/apache/carbondata/common/StringsSuite.java
new file mode 100644
index 000..65da32b
--- /dev/null
+++ b/common/src/test/java/org/apache/carbondata/common/StringsSuite.java
@@ -0,0 +1,53 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the 

[39/50] [abbrv] carbondata git commit: [CARBONDATA-2159] Remove carbon-spark dependency in store-sdk module

2018-03-04 Thread jackylk
http://git-wip-us.apache.org/repos/asf/carbondata/blob/83df87dd/processing/src/main/java/org/apache/carbondata/processing/loading/model/CarbonLoadModelBuilder.java
--
diff --git 
a/processing/src/main/java/org/apache/carbondata/processing/loading/model/CarbonLoadModelBuilder.java
 
b/processing/src/main/java/org/apache/carbondata/processing/loading/model/CarbonLoadModelBuilder.java
new file mode 100644
index 000..fbb93b6
--- /dev/null
+++ 
b/processing/src/main/java/org/apache/carbondata/processing/loading/model/CarbonLoadModelBuilder.java
@@ -0,0 +1,322 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.carbondata.processing.loading.model;
+
+import java.io.IOException;
+import java.text.SimpleDateFormat;
+import java.util.List;
+import java.util.Map;
+
+import org.apache.carbondata.common.Maps;
+import org.apache.carbondata.common.Strings;
+import org.apache.carbondata.common.annotations.InterfaceAudience;
+import org.apache.carbondata.common.constants.LoggerAction;
+import org.apache.carbondata.common.exceptions.sql.InvalidLoadOptionException;
+import org.apache.carbondata.core.constants.CarbonCommonConstants;
+import org.apache.carbondata.core.metadata.schema.table.CarbonTable;
+import org.apache.carbondata.core.metadata.schema.table.column.CarbonColumn;
+import org.apache.carbondata.core.util.CarbonProperties;
+import org.apache.carbondata.core.util.CarbonUtil;
+import 
org.apache.carbondata.processing.loading.constants.DataLoadProcessorConstants;
+import org.apache.carbondata.processing.loading.csvinput.CSVInputFormat;
+import org.apache.carbondata.processing.loading.sort.SortScopeOptions;
+import org.apache.carbondata.processing.util.TableOptionConstant;
+
+import org.apache.commons.lang.StringUtils;
+import org.apache.hadoop.conf.Configuration;
+
+/**
+ * Builder for {@link CarbonLoadModel}
+ */
+@InterfaceAudience.Developer
+public class CarbonLoadModelBuilder {
+
+  private CarbonTable table;
+
+  public CarbonLoadModelBuilder(CarbonTable table) {
+this.table = table;
+  }
+
+  /**
+   * build CarbonLoadModel for data loading
+   * @param options Load options from user input
+   * @return a new CarbonLoadModel instance
+   */
+  public CarbonLoadModel build(
+  Map options) throws InvalidLoadOptionException, 
IOException {
+Map optionsFinal = 
LoadOption.fillOptionWithDefaultValue(options);
+optionsFinal.put("sort_scope", "no_sort");
+if (!options.containsKey("fileheader")) {
+  List csvHeader = 
table.getCreateOrderColumn(table.getTableName());
+  String[] columns = new String[csvHeader.size()];
+  for (int i = 0; i < columns.length; i++) {
+columns[i] = csvHeader.get(i).getColName();
+  }
+  optionsFinal.put("fileheader", Strings.mkString(columns, ","));
+}
+CarbonLoadModel model = new CarbonLoadModel();
+
+// we have provided 'fileheader', so it hadoopConf can be null
+build(options, optionsFinal, model, null);
+
+// set default values
+
model.setTimestampformat(CarbonCommonConstants.CARBON_TIMESTAMP_DEFAULT_FORMAT);
+model.setDateFormat(CarbonCommonConstants.CARBON_DATE_DEFAULT_FORMAT);
+model.setUseOnePass(Boolean.parseBoolean(Maps.getOrDefault(options, 
"onepass", "false")));
+model.setDictionaryServerHost(Maps.getOrDefault(options, "dicthost", 
null));
+try {
+  
model.setDictionaryServerPort(Integer.parseInt(Maps.getOrDefault(options, 
"dictport", "-1")));
+} catch (NumberFormatException e) {
+  throw new InvalidLoadOptionException(e.getMessage());
+}
+return model;
+  }
+
+  /**
+   * build CarbonLoadModel for data loading
+   * @param options Load options from user input
+   * @param optionsFinal Load options that populated with default values for 
optional options
+   * @param carbonLoadModel The output load model
+   * @param hadoopConf hadoopConf is needed to read CSV header if there 
'fileheader' is not set in
+   *   user provided load options
+   */
+  public void build(
+  Map options,
+  Map optionsFinal,
+  CarbonLoadModel 

[20/50] [abbrv] carbondata git commit: [CARBONDATA-2025] Unify all path construction through CarbonTablePath static method

2018-03-04 Thread jackylk
http://git-wip-us.apache.org/repos/asf/carbondata/blob/f06824e9/integration/spark2/src/main/scala/org/apache/spark/sql/execution/command/schema/CarbonAlterTableRenameCommand.scala
--
diff --git 
a/integration/spark2/src/main/scala/org/apache/spark/sql/execution/command/schema/CarbonAlterTableRenameCommand.scala
 
b/integration/spark2/src/main/scala/org/apache/spark/sql/execution/command/schema/CarbonAlterTableRenameCommand.scala
index 40b5cfc..753e637 100644
--- 
a/integration/spark2/src/main/scala/org/apache/spark/sql/execution/command/schema/CarbonAlterTableRenameCommand.scala
+++ 
b/integration/spark2/src/main/scala/org/apache/spark/sql/execution/command/schema/CarbonAlterTableRenameCommand.scala
@@ -34,7 +34,7 @@ import 
org.apache.carbondata.core.metadata.CarbonTableIdentifier
 import org.apache.carbondata.core.metadata.schema.table.CarbonTable
 import org.apache.carbondata.core.statusmanager.SegmentStatusManager
 import org.apache.carbondata.core.util.CarbonUtil
-import org.apache.carbondata.core.util.path.CarbonStorePath
+import org.apache.carbondata.core.util.path.CarbonTablePath
 import org.apache.carbondata.events.{AlterTableRenamePostEvent, 
AlterTableRenamePreEvent, OperationContext, OperationListenerBus}
 import org.apache.carbondata.format.SchemaEvolutionEntry
 import org.apache.carbondata.spark.exception.{ConcurrentOperationException, 
MalformedCarbonCommandException}
@@ -97,8 +97,7 @@ private[sql] case class CarbonAlterTableRenameCommand(
   val oldTableIdentifier = carbonTable.getAbsoluteTableIdentifier
   DataMapStoreManager.getInstance().clearDataMaps(oldTableIdentifier)
   // get the latest carbon table and check for column existence
-  val oldTablePath = CarbonStorePath.getCarbonTablePath(oldTableIdentifier)
-  val tableMetadataFile = oldTablePath.getPath
+  val tableMetadataFile = oldTableIdentifier.getTablePath
   val operationContext = new OperationContext
   // TODO: Pass new Table Path in pre-event.
   val alterTableRenamePreEvent: AlterTableRenamePreEvent = 
AlterTableRenamePreEvent(
@@ -108,7 +107,7 @@ private[sql] case class CarbonAlterTableRenameCommand(
 sparkSession)
   OperationListenerBus.getInstance().fireEvent(alterTableRenamePreEvent, 
operationContext)
   val tableInfo: org.apache.carbondata.format.TableInfo =
-metastore.getThriftTableInfo(oldTablePath)(sparkSession)
+metastore.getThriftTableInfo(carbonTable)(sparkSession)
   val schemaEvolutionEntry = new 
SchemaEvolutionEntry(System.currentTimeMillis)
   schemaEvolutionEntry.setTableName(newTableName)
   timeStamp = System.currentTimeMillis()
@@ -117,7 +116,8 @@ private[sql] case class CarbonAlterTableRenameCommand(
   val fileType = FileFactory.getFileType(tableMetadataFile)
   val newTableIdentifier = new CarbonTableIdentifier(oldDatabaseName,
 newTableName, carbonTable.getCarbonTableIdentifier.getTableId)
-  var newTablePath = CarbonUtil.getNewTablePath(oldTablePath, 
newTableIdentifier.getTableName)
+  var newTablePath = CarbonTablePath.getNewTablePath(
+oldTableIdentifier.getTablePath, newTableIdentifier.getTableName)
   metastore.removeTableFromMetadata(oldDatabaseName, oldTableName)
   val hiveClient = 
sparkSession.sessionState.catalog.asInstanceOf[CarbonSessionCatalog]
 .getClient()
@@ -139,9 +139,9 @@ private[sql] case class CarbonAlterTableRenameCommand(
   // changed the rename order to deal with situation when carbon table and 
hive table
   // will point to the same tablePath
   if (FileFactory.isFileExist(tableMetadataFile, fileType)) {
-val rename = FileFactory.getCarbonFile(oldTablePath.getPath, fileType)
-  .renameForce(oldTablePath.getParent.toString + 
CarbonCommonConstants.FILE_SEPARATOR +
-   newTableName)
+val rename = 
FileFactory.getCarbonFile(oldTableIdentifier.getTablePath, fileType)
+  .renameForce(
+CarbonTablePath.getNewTablePath(oldTableIdentifier.getTablePath, 
newTableName))
 if (!rename) {
   renameBadRecords(newTableName, oldTableName, oldDatabaseName)
   sys.error(s"Folder rename failed for table 
$oldDatabaseName.$oldTableName")
@@ -149,7 +149,7 @@ private[sql] case class CarbonAlterTableRenameCommand(
   }
   val updatedParts = updatePartitionLocations(
 partitions,
-oldTablePath.getPath,
+oldTableIdentifier.getTablePath,
 newTablePath,
 sparkSession)
 
@@ -191,13 +191,11 @@ private[sql] case class CarbonAlterTableRenameCommand(
   case e: Exception =>
 LOGGER.error(e, "Rename table failed: " + e.getMessage)
 if (carbonTable != null) {
-  AlterTableUtil
-.revertRenameTableChanges(oldTableIdentifier,
-  newTableName,
-  carbonTable.getTablePath,
-  

[14/50] [abbrv] carbondata git commit: [CARBONDATA-2099] Refactor query scan process to improve readability

2018-03-04 Thread jackylk
http://git-wip-us.apache.org/repos/asf/carbondata/blob/92c9f224/core/src/main/java/org/apache/carbondata/core/scan/executor/impl/AbstractQueryExecutor.java
--
diff --git 
a/core/src/main/java/org/apache/carbondata/core/scan/executor/impl/AbstractQueryExecutor.java
 
b/core/src/main/java/org/apache/carbondata/core/scan/executor/impl/AbstractQueryExecutor.java
index 69f5ceb..22d1df1 100644
--- 
a/core/src/main/java/org/apache/carbondata/core/scan/executor/impl/AbstractQueryExecutor.java
+++ 
b/core/src/main/java/org/apache/carbondata/core/scan/executor/impl/AbstractQueryExecutor.java
@@ -43,10 +43,9 @@ import 
org.apache.carbondata.core.datastore.block.SegmentProperties;
 import org.apache.carbondata.core.datastore.block.TableBlockInfo;
 import org.apache.carbondata.core.datastore.block.TableBlockUniqueIdentifier;
 import org.apache.carbondata.core.indexstore.BlockletDetailInfo;
-import 
org.apache.carbondata.core.indexstore.blockletindex.BlockletDataRefNodeWrapper;
+import org.apache.carbondata.core.indexstore.blockletindex.BlockletDataRefNode;
 import org.apache.carbondata.core.indexstore.blockletindex.IndexWrapper;
 import org.apache.carbondata.core.keygenerator.KeyGenException;
-import org.apache.carbondata.core.keygenerator.KeyGenerator;
 import org.apache.carbondata.core.memory.UnsafeMemoryManager;
 import org.apache.carbondata.core.metadata.AbsoluteTableIdentifier;
 import org.apache.carbondata.core.metadata.blocklet.BlockletInfo;
@@ -64,8 +63,8 @@ import 
org.apache.carbondata.core.scan.executor.util.RestructureUtil;
 import org.apache.carbondata.core.scan.filter.FilterUtil;
 import org.apache.carbondata.core.scan.filter.SingleTableProvider;
 import org.apache.carbondata.core.scan.filter.TableProvider;
-import org.apache.carbondata.core.scan.model.QueryDimension;
-import org.apache.carbondata.core.scan.model.QueryMeasure;
+import org.apache.carbondata.core.scan.model.ProjectionDimension;
+import org.apache.carbondata.core.scan.model.ProjectionMeasure;
 import org.apache.carbondata.core.scan.model.QueryModel;
 import org.apache.carbondata.core.stats.QueryStatistic;
 import org.apache.carbondata.core.stats.QueryStatisticsConstants;
@@ -121,7 +120,6 @@ public abstract class AbstractQueryExecutor implements 
QueryExecutor {
 queryProperties.queryStatisticsRecorder =
 
CarbonTimeStatisticsFactory.createExecutorRecorder(queryModel.getQueryId());
 queryModel.setStatisticsRecorder(queryProperties.queryStatisticsRecorder);
-QueryUtil.resolveQueryModel(queryModel);
 QueryStatistic queryStatistic = new QueryStatistic();
 // sort the block info
 // so block will be loaded in sorted order this will be required for
@@ -168,12 +166,12 @@ public abstract class AbstractQueryExecutor implements 
QueryExecutor {
 .addStatistics(QueryStatisticsConstants.LOAD_BLOCKS_EXECUTOR, 
System.currentTimeMillis());
 queryProperties.queryStatisticsRecorder.recordStatistics(queryStatistic);
 // calculating the total number of aggeragted columns
-int measureCount = queryModel.getQueryMeasures().size();
+int measureCount = queryModel.getProjectionMeasures().size();
 
 int currentIndex = 0;
 DataType[] dataTypes = new DataType[measureCount];
 
-for (QueryMeasure carbonMeasure : queryModel.getQueryMeasures()) {
+for (ProjectionMeasure carbonMeasure : queryModel.getProjectionMeasures()) 
{
   // adding the data type and aggregation type of all the measure this
   // can be used
   // to select the aggregator
@@ -198,9 +196,11 @@ public abstract class AbstractQueryExecutor implements 
QueryExecutor {
 queryStatistic = new QueryStatistic();
 // dictionary column unique column id to dictionary mapping
 // which will be used to get column actual data
-queryProperties.columnToDictionayMapping = QueryUtil
-.getDimensionDictionaryDetail(queryModel.getQueryDimension(),
-queryProperties.complexFilterDimension, 
queryModel.getAbsoluteTableIdentifier(),
+queryProperties.columnToDictionayMapping =
+QueryUtil.getDimensionDictionaryDetail(
+queryModel.getProjectionDimensions(),
+queryProperties.complexFilterDimension,
+queryModel.getAbsoluteTableIdentifier(),
 tableProvider);
 queryStatistic
 .addStatistics(QueryStatisticsConstants.LOAD_DICTIONARY, 
System.currentTimeMillis());
@@ -263,8 +263,8 @@ public abstract class AbstractQueryExecutor implements 
QueryExecutor {
 // and query will be executed based on that infos
 for (int i = 0; i < queryProperties.dataBlocks.size(); i++) {
   AbstractIndex abstractIndex = queryProperties.dataBlocks.get(i);
-  BlockletDataRefNodeWrapper dataRefNode =
-  (BlockletDataRefNodeWrapper) abstractIndex.getDataRefNode();
+  BlockletDataRefNode dataRefNode =
+  (BlockletDataRefNode) abstractIndex.getDataRefNode();
   

[02/50] [abbrv] carbondata git commit: [CARBONDATA-1827] S3 Carbon Implementation

2018-03-04 Thread jackylk
[CARBONDATA-1827] S3 Carbon Implementation

1.Provide support for s3 in carbondata.
2.Added S3Example to create carbon table on s3.
3.Added S3CSVExample to load carbon table using csv from s3.

This closes #1805


Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo
Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/9086a1b9
Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/9086a1b9
Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/9086a1b9

Branch: refs/heads/carbonstore-rebase5
Commit: 9086a1b9f2cd6cf1d4d42290a4e3678b01472714
Parents: 0c75ab7
Author: SangeetaGulia 
Authored: Thu Sep 21 14:56:26 2017 +0530
Committer: Jacky Li 
Committed: Sun Mar 4 20:30:31 2018 +0800

--
 .../core/constants/CarbonCommonConstants.java   |  21 +++
 .../filesystem/AbstractDFSCarbonFile.java   |  20 ++-
 .../datastore/filesystem/HDFSCarbonFile.java|   5 +-
 .../core/datastore/impl/FileFactory.java|  11 +-
 .../core/locks/CarbonLockFactory.java   |  28 ++--
 .../carbondata/core/locks/S3FileLock.java   | 111 +
 .../carbondata/core/util/CarbonProperties.java  |   3 +-
 .../filesystem/HDFSCarbonFileTest.java  |   8 +-
 examples/spark2/pom.xml |   5 +
 examples/spark2/src/main/resources/data1.csv|  11 ++
 .../carbondata/examples/S3CsvExample.scala  |  99 +++
 .../apache/carbondata/examples/S3Example.scala  | 164 +++
 .../spark/rdd/NewCarbonDataLoadRDD.scala|  42 -
 integration/spark2/pom.xml  |  43 +
 .../spark/rdd/CarbonDataRDDFactory.scala|   3 +-
 .../org/apache/spark/sql/CarbonSession.scala|   3 +
 16 files changed, 554 insertions(+), 23 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/carbondata/blob/9086a1b9/core/src/main/java/org/apache/carbondata/core/constants/CarbonCommonConstants.java
--
diff --git 
a/core/src/main/java/org/apache/carbondata/core/constants/CarbonCommonConstants.java
 
b/core/src/main/java/org/apache/carbondata/core/constants/CarbonCommonConstants.java
index b2a3375..af3ed99 100644
--- 
a/core/src/main/java/org/apache/carbondata/core/constants/CarbonCommonConstants.java
+++ 
b/core/src/main/java/org/apache/carbondata/core/constants/CarbonCommonConstants.java
@@ -167,6 +167,22 @@ public final class CarbonCommonConstants {
   public static final String S3N_PREFIX = "s3n://";
 
   public static final String S3A_PREFIX = "s3a://";
+  /**
+   * Access Key for s3n
+   */
+  public static final String S3N_ACCESS_KEY = "fs.s3n.awsAccessKeyId";
+  /**
+   * Secret Key for s3n
+   */
+  public static final String S3N_SECRET_KEY = "fs.s3n.awsSecretAccessKey";
+  /**
+   * Access Key for s3
+   */
+  public static final String S3_ACCESS_KEY = "fs.s3.awsAccessKeyId";
+  /**
+   * Secret Key for s3
+   */
+  public static final String S3_SECRET_KEY = "fs.s3.awsSecretAccessKey";
 
   /**
* FS_DEFAULT_FS
@@ -937,6 +953,11 @@ public final class CarbonCommonConstants {
   public static final String CARBON_LOCK_TYPE_HDFS = "HDFSLOCK";
 
   /**
+   * S3LOCK TYPE
+   */
+  public static final String CARBON_LOCK_TYPE_S3 = "S3LOCK";
+
+  /**
* Invalid filter member log string
*/
   public static final String FILTER_INVALID_MEMBER =

http://git-wip-us.apache.org/repos/asf/carbondata/blob/9086a1b9/core/src/main/java/org/apache/carbondata/core/datastore/filesystem/AbstractDFSCarbonFile.java
--
diff --git 
a/core/src/main/java/org/apache/carbondata/core/datastore/filesystem/AbstractDFSCarbonFile.java
 
b/core/src/main/java/org/apache/carbondata/core/datastore/filesystem/AbstractDFSCarbonFile.java
index 68eaa21..fd5dc40 100644
--- 
a/core/src/main/java/org/apache/carbondata/core/datastore/filesystem/AbstractDFSCarbonFile.java
+++ 
b/core/src/main/java/org/apache/carbondata/core/datastore/filesystem/AbstractDFSCarbonFile.java
@@ -51,7 +51,7 @@ import org.apache.hadoop.io.compress.GzipCodec;
 import org.apache.hadoop.io.compress.Lz4Codec;
 import org.apache.hadoop.io.compress.SnappyCodec;
 
-public abstract  class AbstractDFSCarbonFile implements CarbonFile {
+public abstract class AbstractDFSCarbonFile implements CarbonFile {
   /**
* LOGGER
*/
@@ -262,18 +262,28 @@ public abstract  class AbstractDFSCarbonFile implements 
CarbonFile {
   @Override public DataOutputStream getDataOutputStream(String path, 
FileFactory.FileType fileType,
   int bufferSize, boolean append) throws IOException {
 Path pt = new Path(path);
-FileSystem fs = pt.getFileSystem(FileFactory.getConfiguration());
+FileSystem fileSystem = pt.getFileSystem(FileFactory.getConfiguration());
 FSDataOutputStream 

[18/50] [abbrv] carbondata git commit: [CARBONDATA-2099] Refactor query scan process to improve readability

2018-03-04 Thread jackylk
[CARBONDATA-2099] Refactor query scan process to improve readability

Unified concepts in scan process flow:

1.QueryModel contains all parameter for scan, it is created by API in 
CarbonTable. (In future, CarbonTable will be the entry point for various table 
operations)
2.Use term ColumnChunk to represent one column in one blocklet, and use 
ChunkIndex in reader to read specified column chunk
3.Use term ColumnPage to represent one page in one ColumnChunk
4.QueryColumn => ProjectionColumn, indicating it is for projection

This closes #1874


Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo
Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/92c9f224
Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/92c9f224
Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/92c9f224

Branch: refs/heads/carbonstore-rebase5
Commit: 92c9f224094581378a681fd1f7b0cb02b923687c
Parents: bd40a0d
Author: Jacky Li 
Authored: Tue Jan 30 21:24:04 2018 +0800
Committer: Jacky Li 
Committed: Sun Mar 4 20:30:32 2018 +0800

--
 .../dictionary/AbstractDictionaryCache.java |   3 +-
 .../cache/dictionary/DictionaryCacheLoader.java |   7 +-
 .../dictionary/DictionaryCacheLoaderImpl.java   |  11 +-
 .../core/datastore/BTreeBuilderInfo.java|   6 -
 .../carbondata/core/datastore/DataRefNode.java  |  81 +--
 .../carbondata/core/datastore/FileHolder.java   | 118 
 .../carbondata/core/datastore/FileReader.java   | 114 +++
 .../core/datastore/block/SegmentProperties.java |  50 +-
 .../chunk/DimensionColumnDataChunk.java | 116 ---
 .../datastore/chunk/DimensionColumnPage.java| 111 +++
 .../chunk/impl/AbstractDimensionColumnPage.java |  89 +++
 .../chunk/impl/AbstractDimensionDataChunk.java  |  95 ---
 .../impl/ColumnGroupDimensionColumnPage.java| 194 ++
 .../impl/ColumnGroupDimensionDataChunk.java | 194 --
 .../chunk/impl/DimensionRawColumnChunk.java |  46 +-
 .../impl/FixedLengthDimensionColumnPage.java| 163 +
 .../impl/FixedLengthDimensionDataChunk.java | 163 -
 .../chunk/impl/MeasureRawColumnChunk.java   |  26 +-
 .../impl/VariableLengthDimensionColumnPage.java | 133 
 .../impl/VariableLengthDimensionDataChunk.java  | 140 
 .../reader/DimensionColumnChunkReader.java  |  14 +-
 .../chunk/reader/MeasureColumnChunkReader.java  |  12 +-
 .../AbstractChunkReaderV2V3Format.java  |  34 +-
 ...mpressedDimensionChunkFileBasedReaderV1.java |  38 +-
 ...mpressedDimensionChunkFileBasedReaderV2.java |  30 +-
 ...essedDimChunkFileBasedPageLevelReaderV3.java |  11 +-
 ...mpressedDimensionChunkFileBasedReaderV3.java |  49 +-
 .../AbstractMeasureChunkReaderV2V3Format.java   |  42 +-
 ...CompressedMeasureChunkFileBasedReaderV1.java |  16 +-
 ...CompressedMeasureChunkFileBasedReaderV2.java |  24 +-
 ...CompressedMeasureChunkFileBasedReaderV3.java |  45 +-
 ...essedMsrChunkFileBasedPageLevelReaderV3.java |   8 +-
 .../chunk/store/ColumnPageWrapper.java  |  30 +-
 .../chunk/store/DimensionDataChunkStore.java|   8 +-
 .../SafeFixedLengthDimensionDataChunkStore.java |   6 +-
 ...feVariableLengthDimensionDataChunkStore.java |   8 +-
 ...nsafeFixedLengthDimensionDataChunkStore.java |  10 +-
 ...afeVariableLengthDimesionDataChunkStore.java |  10 +-
 .../datastore/columnar/ColumnGroupModel.java|  26 -
 .../core/datastore/impl/DFSFileHolderImpl.java  | 166 -
 .../core/datastore/impl/DFSFileReaderImpl.java  | 155 
 .../datastore/impl/DefaultFileTypeProvider.java |  16 +-
 .../core/datastore/impl/FileFactory.java|   4 +-
 .../core/datastore/impl/FileHolderImpl.java | 224 --
 .../core/datastore/impl/FileReaderImpl.java | 215 ++
 .../core/datastore/impl/FileTypeInerface.java   |   4 +-
 .../impl/btree/AbstractBTreeLeafNode.java   |  60 +-
 .../impl/btree/BTreeDataRefNodeFinder.java  |   6 +-
 .../datastore/impl/btree/BTreeNonLeafNode.java  |  52 +-
 .../impl/btree/BlockBTreeLeafNode.java  |   6 +-
 .../impl/btree/BlockletBTreeLeafNode.java   |  46 +-
 .../page/encoding/EncodingFactory.java  |   8 +-
 .../server/NonSecureDictionaryServer.java   |   1 -
 .../core/indexstore/BlockletDetailInfo.java |   4 -
 .../blockletindex/BlockletDataRefNode.java  | 228 ++
 .../BlockletDataRefNodeWrapper.java | 241 ---
 .../indexstore/blockletindex/IndexWrapper.java  |   2 +-
 .../blockletindex/SegmentIndexFileStore.java|   7 +-
 .../core/memory/HeapMemoryAllocator.java|   2 +-
 .../core/metadata/blocklet/SegmentInfo.java |  19 -
 .../core/metadata/schema/table/CarbonTable.java | 130 +++-
 .../schema/table/RelationIdentifier.java|  16 -
 .../core/metadata/schema/table/TableInfo.java   |   6 +-
 .../schema/table/column/CarbonColumn.java   |   2 +-
 .../schema/table/column/CarbonDimension.java|  12 -
 

[31/50] [abbrv] carbondata git commit: [HotFix][CheckStyle] Fix import related checkstyle

2018-03-04 Thread jackylk
[HotFix][CheckStyle] Fix import related checkstyle

This closes #1952


Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo
Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/d88d5bb9
Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/d88d5bb9
Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/d88d5bb9

Branch: refs/heads/carbonstore-rebase5
Commit: d88d5bb940f0fea6e5c8560fc5c8ea3724b95a28
Parents: bb5bb00
Author: xuchuanyin 
Authored: Thu Feb 8 15:39:45 2018 +0800
Committer: Jacky Li 
Committed: Sun Mar 4 20:32:12 2018 +0800

--
 .../core/indexstore/blockletindex/BlockletDataRefNode.java | 2 +-
 .../org/apache/carbondata/core/memory/HeapMemoryAllocator.java | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/carbondata/blob/d88d5bb9/core/src/main/java/org/apache/carbondata/core/indexstore/blockletindex/BlockletDataRefNode.java
--
diff --git 
a/core/src/main/java/org/apache/carbondata/core/indexstore/blockletindex/BlockletDataRefNode.java
 
b/core/src/main/java/org/apache/carbondata/core/indexstore/blockletindex/BlockletDataRefNode.java
index b8fd6ff..50862a7 100644
--- 
a/core/src/main/java/org/apache/carbondata/core/indexstore/blockletindex/BlockletDataRefNode.java
+++ 
b/core/src/main/java/org/apache/carbondata/core/indexstore/blockletindex/BlockletDataRefNode.java
@@ -33,8 +33,8 @@ import 
org.apache.carbondata.core.datastore.chunk.reader.MeasureColumnChunkReade
 import org.apache.carbondata.core.indexstore.BlockletDetailInfo;
 import org.apache.carbondata.core.indexstore.FineGrainBlocklet;
 import org.apache.carbondata.core.metadata.ColumnarFormatVersion;
-import org.apache.carbondata.core.util.BitSetGroup;
 import org.apache.carbondata.core.metadata.blocklet.index.BlockletIndex;
+import org.apache.carbondata.core.util.BitSetGroup;
 
 /**
  * wrapper for blocklet data map data

http://git-wip-us.apache.org/repos/asf/carbondata/blob/d88d5bb9/core/src/main/java/org/apache/carbondata/core/memory/HeapMemoryAllocator.java
--
diff --git 
a/core/src/main/java/org/apache/carbondata/core/memory/HeapMemoryAllocator.java 
b/core/src/main/java/org/apache/carbondata/core/memory/HeapMemoryAllocator.java
index 53cbb1d..242995b 100644
--- 
a/core/src/main/java/org/apache/carbondata/core/memory/HeapMemoryAllocator.java
+++ 
b/core/src/main/java/org/apache/carbondata/core/memory/HeapMemoryAllocator.java
@@ -17,11 +17,11 @@
 
 package org.apache.carbondata.core.memory;
 
-import javax.annotation.concurrent.GuardedBy;
 import java.lang.ref.WeakReference;
 import java.util.HashMap;
 import java.util.LinkedList;
 import java.util.Map;
+import javax.annotation.concurrent.GuardedBy;
 
 import org.apache.carbondata.core.util.CarbonProperties;
 



[30/50] [abbrv] carbondata git commit: [CARBONDATA-1480]Min Max Index Example for DataMap

2018-03-04 Thread jackylk
[CARBONDATA-1480]Min Max Index Example for DataMap

Datamap Example. Implementation of Min Max Index through Datamap. And Using the 
Index while prunning.

This closes #1359


Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo
Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/f9d15a21
Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/f9d15a21
Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/f9d15a21

Branch: refs/heads/carbonstore-rebase5
Commit: f9d15a215adc91077f1a6ca6a456e5fce4bc05eb
Parents: dfbdf3d
Author: sounakr 
Authored: Thu Sep 28 16:21:05 2017 +0530
Committer: Jacky Li 
Committed: Sun Mar 4 20:32:12 2018 +0800

--
 .../core/datamap/DataMapStoreManager.java   |  16 +-
 .../carbondata/core/datamap/TableDataMap.java   |  18 +-
 .../carbondata/core/datamap/dev/DataMap.java|  11 +-
 .../core/datamap/dev/DataMapWriter.java |   3 +-
 .../indexstore/SegmentPropertiesFetcher.java|  36 +++
 .../blockletindex/BlockletDataMap.java  |   5 +-
 .../blockletindex/BlockletDataMapFactory.java   |  32 ++-
 datamap/examples/pom.xml| 111 ++
 .../datamap/examples/BlockletMinMax.java|  41 
 .../datamap/examples/MinMaxDataMap.java | 143 
 .../datamap/examples/MinMaxDataMapFactory.java  | 114 ++
 .../datamap/examples/MinMaxDataWriter.java  | 221 +++
 .../examples/MinMaxIndexBlockDetails.java   |  77 +++
 .../MinMaxDataMapExample.scala  |  77 +++
 .../testsuite/datamap/DataMapWriterSuite.scala  |   2 +-
 pom.xml |   2 +
 .../datamap/DataMapWriterListener.java  |   4 +-
 .../store/writer/AbstractFactDataWriter.java|   7 +-
 .../writer/v3/CarbonFactDataWriterImplV3.java   |   3 +
 19 files changed, 894 insertions(+), 29 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/carbondata/blob/f9d15a21/core/src/main/java/org/apache/carbondata/core/datamap/DataMapStoreManager.java
--
diff --git 
a/core/src/main/java/org/apache/carbondata/core/datamap/DataMapStoreManager.java
 
b/core/src/main/java/org/apache/carbondata/core/datamap/DataMapStoreManager.java
index d30483a..90e5fff 100644
--- 
a/core/src/main/java/org/apache/carbondata/core/datamap/DataMapStoreManager.java
+++ 
b/core/src/main/java/org/apache/carbondata/core/datamap/DataMapStoreManager.java
@@ -26,6 +26,7 @@ import org.apache.carbondata.common.logging.LogService;
 import org.apache.carbondata.common.logging.LogServiceFactory;
 import org.apache.carbondata.core.datamap.dev.DataMapFactory;
 import org.apache.carbondata.core.indexstore.BlockletDetailsFetcher;
+import org.apache.carbondata.core.indexstore.SegmentPropertiesFetcher;
 import org.apache.carbondata.core.indexstore.blockletindex.BlockletDataMap;
 import 
org.apache.carbondata.core.indexstore.blockletindex.BlockletDataMapFactory;
 import org.apache.carbondata.core.metadata.AbsoluteTableIdentifier;
@@ -103,7 +104,7 @@ public final class DataMapStoreManager {
   tableDataMaps = new ArrayList<>();
 }
 TableDataMap dataMap = getTableDataMap(dataMapName, tableDataMaps);
-if (dataMap != null) {
+if (dataMap != null && 
dataMap.getDataMapName().equalsIgnoreCase(dataMapName)) {
   throw new RuntimeException("Already datamap exists in that path with 
type " + dataMapName);
 }
 
@@ -113,12 +114,15 @@ public final class DataMapStoreManager {
   DataMapFactory dataMapFactory = factoryClass.newInstance();
   dataMapFactory.init(identifier, dataMapName);
   BlockletDetailsFetcher blockletDetailsFetcher;
+  SegmentPropertiesFetcher segmentPropertiesFetcher = null;
   if (dataMapFactory instanceof BlockletDetailsFetcher) {
 blockletDetailsFetcher = (BlockletDetailsFetcher) dataMapFactory;
   } else {
 blockletDetailsFetcher = getBlockletDetailsFetcher(identifier);
   }
-  dataMap = new TableDataMap(identifier, dataMapName, dataMapFactory, 
blockletDetailsFetcher);
+  segmentPropertiesFetcher = (SegmentPropertiesFetcher) 
blockletDetailsFetcher;
+  dataMap = new TableDataMap(identifier, dataMapName, dataMapFactory, 
blockletDetailsFetcher,
+  segmentPropertiesFetcher);
 } catch (Exception e) {
   LOGGER.error(e);
   throw new RuntimeException(e);
@@ -128,11 +132,11 @@ public final class DataMapStoreManager {
 return dataMap;
   }
 
-  private TableDataMap getTableDataMap(String dataMapName,
-  List tableDataMaps) {
+  private TableDataMap getTableDataMap(String dataMapName, List 
tableDataMaps) {
 TableDataMap dataMap = null;
-for (TableDataMap tableDataMap: tableDataMaps) {
-  if 

[43/50] [abbrv] carbondata git commit: [CARBONDATA-2159] Remove carbon-spark dependency in store-sdk module

2018-03-04 Thread jackylk
http://git-wip-us.apache.org/repos/asf/carbondata/blob/dcfe73b8/integration/spark-common/src/main/scala/org/apache/carbondata/spark/util/DataLoadingUtil.scala
--
diff --git 
a/integration/spark-common/src/main/scala/org/apache/carbondata/spark/util/DataLoadingUtil.scala
 
b/integration/spark-common/src/main/scala/org/apache/carbondata/spark/util/DataLoadingUtil.scala
index 8d394db..e69de29 100644
--- 
a/integration/spark-common/src/main/scala/org/apache/carbondata/spark/util/DataLoadingUtil.scala
+++ 
b/integration/spark-common/src/main/scala/org/apache/carbondata/spark/util/DataLoadingUtil.scala
@@ -1,610 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.carbondata.spark.util
-
-import java.text.SimpleDateFormat
-import java.util
-import java.util.{Date, List, Locale}
-
-import scala.collection.{immutable, mutable}
-import scala.collection.JavaConverters._
-import scala.collection.mutable.ArrayBuffer
-
-import org.apache.commons.lang3.StringUtils
-import org.apache.hadoop.conf.Configuration
-import org.apache.hadoop.fs.Path
-import org.apache.hadoop.mapred.JobConf
-import org.apache.hadoop.mapreduce.{TaskAttemptID, TaskType}
-import org.apache.hadoop.mapreduce.lib.input.{FileInputFormat, FileSplit}
-import org.apache.hadoop.mapreduce.task.{JobContextImpl, 
TaskAttemptContextImpl}
-import org.apache.spark.deploy.SparkHadoopUtil
-import org.apache.spark.rdd.RDD
-import org.apache.spark.sql.SparkSession
-import org.apache.spark.sql.catalyst.InternalRow
-import org.apache.spark.sql.catalyst.expressions.GenericInternalRow
-import org.apache.spark.sql.execution.datasources.{FilePartition, FileScanRDD, 
PartitionedFile}
-import org.apache.spark.sql.util.CarbonException
-import org.apache.spark.sql.util.SparkSQLUtil.sessionState
-
-import org.apache.carbondata.common.constants.LoggerAction
-import org.apache.carbondata.common.logging.{LogService, LogServiceFactory}
-import org.apache.carbondata.core.constants.{CarbonCommonConstants, 
CarbonLoadOptionConstants}
-import org.apache.carbondata.core.indexstore.PartitionSpec
-import org.apache.carbondata.core.locks.{CarbonLockFactory, CarbonLockUtil, 
LockUsage}
-import org.apache.carbondata.core.metadata.AbsoluteTableIdentifier
-import org.apache.carbondata.core.metadata.schema.table.CarbonTable
-import org.apache.carbondata.core.statusmanager.{LoadMetadataDetails, 
SegmentStatus, SegmentStatusManager}
-import org.apache.carbondata.core.util.{CarbonProperties, CarbonUtil}
-import 
org.apache.carbondata.processing.loading.constants.DataLoadProcessorConstants
-import org.apache.carbondata.processing.loading.csvinput.CSVInputFormat
-import org.apache.carbondata.processing.loading.model.{CarbonDataLoadSchema, 
CarbonLoadModel}
-import org.apache.carbondata.processing.util.{CarbonLoaderUtil, 
DeleteLoadFolders, TableOptionConstant}
-import org.apache.carbondata.spark.exception.MalformedCarbonCommandException
-import org.apache.carbondata.spark.load.DataLoadProcessBuilderOnSpark.LOGGER
-import org.apache.carbondata.spark.load.ValidateUtil
-import org.apache.carbondata.spark.rdd.SerializableConfiguration
-
-/**
- * the util object of data loading
- */
-object DataLoadingUtil {
-
-  val LOGGER: LogService = 
LogServiceFactory.getLogService(this.getClass.getCanonicalName)
-
-  /**
-   * get data loading options and initialise default value
-   */
-  def getDataLoadingOptions(
-  carbonProperty: CarbonProperties,
-  options: immutable.Map[String, String]): mutable.Map[String, String] = {
-val optionsFinal = scala.collection.mutable.Map[String, String]()
-optionsFinal.put("delimiter", options.getOrElse("delimiter", ","))
-optionsFinal.put("quotechar", options.getOrElse("quotechar", "\""))
-optionsFinal.put("fileheader", options.getOrElse("fileheader", ""))
-optionsFinal.put("commentchar", options.getOrElse("commentchar", "#"))
-optionsFinal.put("columndict", options.getOrElse("columndict", null))
-
-optionsFinal.put("escapechar",
-  CarbonLoaderUtil.getEscapeChar(options.getOrElse("escapechar", "\\")))
-
-optionsFinal.put(
-  "serialization_null_format",
-  

[11/50] [abbrv] carbondata git commit: [CARBONDATA-2099] Refactor query scan process to improve readability

2018-03-04 Thread jackylk
http://git-wip-us.apache.org/repos/asf/carbondata/blob/92c9f224/core/src/main/java/org/apache/carbondata/core/scan/filter/executer/RowLevelRangeLessThanFiterExecuterImpl.java
--
diff --git 
a/core/src/main/java/org/apache/carbondata/core/scan/filter/executer/RowLevelRangeLessThanFiterExecuterImpl.java
 
b/core/src/main/java/org/apache/carbondata/core/scan/filter/executer/RowLevelRangeLessThanFiterExecuterImpl.java
index 447ab46..547ecaa 100644
--- 
a/core/src/main/java/org/apache/carbondata/core/scan/filter/executer/RowLevelRangeLessThanFiterExecuterImpl.java
+++ 
b/core/src/main/java/org/apache/carbondata/core/scan/filter/executer/RowLevelRangeLessThanFiterExecuterImpl.java
@@ -22,7 +22,7 @@ import java.util.List;
 
 import org.apache.carbondata.core.constants.CarbonCommonConstants;
 import org.apache.carbondata.core.datastore.block.SegmentProperties;
-import org.apache.carbondata.core.datastore.chunk.DimensionColumnDataChunk;
+import org.apache.carbondata.core.datastore.chunk.DimensionColumnPage;
 import org.apache.carbondata.core.datastore.chunk.impl.DimensionRawColumnChunk;
 import org.apache.carbondata.core.datastore.chunk.impl.MeasureRawColumnChunk;
 import org.apache.carbondata.core.datastore.page.ColumnPage;
@@ -35,12 +35,11 @@ import org.apache.carbondata.core.metadata.encoder.Encoding;
 import org.apache.carbondata.core.metadata.schema.table.column.CarbonDimension;
 import org.apache.carbondata.core.metadata.schema.table.column.CarbonMeasure;
 import org.apache.carbondata.core.scan.expression.Expression;
-import 
org.apache.carbondata.core.scan.expression.exception.FilterUnsupportedException;
 import org.apache.carbondata.core.scan.filter.FilterUtil;
 import org.apache.carbondata.core.scan.filter.intf.RowIntf;
 import 
org.apache.carbondata.core.scan.filter.resolver.resolverinfo.DimColumnResolvedFilterInfo;
 import 
org.apache.carbondata.core.scan.filter.resolver.resolverinfo.MeasureColumnResolvedFilterInfo;
-import org.apache.carbondata.core.scan.processor.BlocksChunkHolder;
+import org.apache.carbondata.core.scan.processor.RawBlockletColumnChunks;
 import org.apache.carbondata.core.util.BitSetGroup;
 import org.apache.carbondata.core.util.ByteUtil;
 import org.apache.carbondata.core.util.CarbonUtil;
@@ -73,7 +72,7 @@ public class RowLevelRangeLessThanFiterExecuterImpl extends 
RowLevelFilterExecut
   comparator = 
Comparator.getComparatorByDataTypeForMeasure(measure.getDataType());
 }
 ifDefaultValueMatchesFilter();
-if (isDimensionPresentInCurrentBlock[0] == true) {
+if (isDimensionPresentInCurrentBlock[0]) {
   isNaturalSorted = 
dimColEvaluatorInfoList.get(0).getDimension().isUseInvertedIndex()
   && dimColEvaluatorInfoList.get(0).getDimension().isSortColumn();
 }
@@ -120,11 +119,11 @@ public class RowLevelRangeLessThanFiterExecuterImpl 
extends RowLevelFilterExecut
 boolean isScanRequired = false;
 if (isMeasurePresentInCurrentBlock[0] || 
isDimensionPresentInCurrentBlock[0]) {
   if (isMeasurePresentInCurrentBlock[0]) {
-minValue = blockMinValue[measureBlocksIndex[0] + 
lastDimensionColOrdinal];
+minValue = blockMinValue[measureChunkIndex[0] + 
lastDimensionColOrdinal];
 isScanRequired =
 isScanRequired(minValue, msrFilterRangeValues, 
msrColEvalutorInfoList.get(0).getType());
   } else {
-minValue = blockMinValue[dimensionBlocksIndex[0]];
+minValue = blockMinValue[dimensionChunkIndex[0]];
 isScanRequired = isScanRequired(minValue, filterRangeValues);
   }
 } else {
@@ -170,67 +169,69 @@ public class RowLevelRangeLessThanFiterExecuterImpl 
extends RowLevelFilterExecut
   }
 
   @Override
-  public BitSetGroup applyFilter(BlocksChunkHolder blockChunkHolder, boolean 
useBitsetPipeLine)
-  throws FilterUnsupportedException, IOException {
+  public BitSetGroup applyFilter(RawBlockletColumnChunks 
rawBlockletColumnChunks,
+  boolean useBitsetPipeLine) throws IOException {
 // select all rows if dimension does not exists in the current block
 if (!isDimensionPresentInCurrentBlock[0] && 
!isMeasurePresentInCurrentBlock[0]) {
-  int numberOfRows = blockChunkHolder.getDataBlock().nodeSize();
+  int numberOfRows = rawBlockletColumnChunks.getDataBlock().numRows();
   return FilterUtil
-  
.createBitSetGroupWithDefaultValue(blockChunkHolder.getDataBlock().numberOfPages(),
+  
.createBitSetGroupWithDefaultValue(rawBlockletColumnChunks.getDataBlock().numberOfPages(),
   numberOfRows, true);
 }
 if (isDimensionPresentInCurrentBlock[0]) {
-  int blockIndex =
-  
segmentProperties.getDimensionOrdinalToBlockMapping().get(dimensionBlocksIndex[0]);
-  if (null == blockChunkHolder.getDimensionRawDataChunk()[blockIndex]) {
-blockChunkHolder.getDimensionRawDataChunk()[blockIndex] = 
blockChunkHolder.getDataBlock()
-

  1   2   >