[35/50] [abbrv] carbondata git commit: Revert "[CARBONDATA-2023][DataLoad] Add size base block allocation in data loading"

2018-03-04 Thread jackylk
Revert "[CARBONDATA-2023][DataLoad] Add size base block allocation in data 
loading"

This reverts commit 6dd8b038fc898dbf48ad30adfc870c19eb38e3d0.


Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo
Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/1d85e916
Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/1d85e916
Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/1d85e916

Branch: refs/heads/carbonstore
Commit: 1d85e916f6a0f070960555fb18ee4cd8acbfa315
Parents: 6216294
Author: Jacky Li 
Authored: Sat Feb 10 10:34:59 2018 +0800
Committer: Jacky Li 
Committed: Sun Mar 4 20:32:13 2018 +0800

--
 .../constants/CarbonLoadOptionConstants.java|  10 -
 .../core/datastore/block/TableBlockInfo.java|  29 --
 .../carbondata/core/util/CarbonProperties.java  |  11 -
 docs/useful-tips-on-carbondata.md   |   1 -
 .../spark/rdd/NewCarbonDataLoadRDD.scala|   4 +-
 .../spark/sql/hive/DistributionUtil.scala   |   2 +-
 .../spark/rdd/CarbonDataRDDFactory.scala|  18 +-
 .../merger/NodeMultiBlockRelation.java  |  40 --
 .../processing/util/CarbonLoaderUtil.java   | 494 +++
 .../processing/util/CarbonLoaderUtilTest.java   | 125 -
 10 files changed, 183 insertions(+), 551 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/carbondata/blob/1d85e916/core/src/main/java/org/apache/carbondata/core/constants/CarbonLoadOptionConstants.java
--
diff --git 
a/core/src/main/java/org/apache/carbondata/core/constants/CarbonLoadOptionConstants.java
 
b/core/src/main/java/org/apache/carbondata/core/constants/CarbonLoadOptionConstants.java
index a6bf60f..bcfeba0 100644
--- 
a/core/src/main/java/org/apache/carbondata/core/constants/CarbonLoadOptionConstants.java
+++ 
b/core/src/main/java/org/apache/carbondata/core/constants/CarbonLoadOptionConstants.java
@@ -114,14 +114,4 @@ public final class CarbonLoadOptionConstants {
*/
   public static final int MAX_EXTERNAL_DICTIONARY_SIZE = 1000;
 
-  /**
-   * enable block size based block allocation while loading data. By default, 
carbondata assigns
-   * blocks to node based on block number. If this option is set to `true`, 
carbondata will
-   * consider block size first and make sure that all the nodes will process 
almost equal size of
-   * data. This option is especially useful when you encounter skewed data.
-   */
-  @CarbonProperty
-  public static final String ENABLE_CARBON_LOAD_SKEWED_DATA_OPTIMIZATION
-  = "carbon.load.skewedDataOptimization.enabled";
-  public static final String 
ENABLE_CARBON_LOAD_SKEWED_DATA_OPTIMIZATION_DEFAULT = "false";
 }

http://git-wip-us.apache.org/repos/asf/carbondata/blob/1d85e916/core/src/main/java/org/apache/carbondata/core/datastore/block/TableBlockInfo.java
--
diff --git 
a/core/src/main/java/org/apache/carbondata/core/datastore/block/TableBlockInfo.java
 
b/core/src/main/java/org/apache/carbondata/core/datastore/block/TableBlockInfo.java
index c0cebe0..a7bfdba 100644
--- 
a/core/src/main/java/org/apache/carbondata/core/datastore/block/TableBlockInfo.java
+++ 
b/core/src/main/java/org/apache/carbondata/core/datastore/block/TableBlockInfo.java
@@ -19,8 +19,6 @@ package org.apache.carbondata.core.datastore.block;
 import java.io.IOException;
 import java.io.Serializable;
 import java.nio.charset.Charset;
-import java.util.Arrays;
-import java.util.Comparator;
 import java.util.HashMap;
 import java.util.Map;
 
@@ -100,20 +98,6 @@ public class TableBlockInfo implements Distributable, 
Serializable {
 
   private String dataMapWriterPath;
 
-  /**
-   * comparator to sort by block size in descending order.
-   * Since each line is not exactly the same, the size of a InputSplit may 
differs,
-   * so we allow some deviation for these splits.
-   */
-  public static final Comparator DATA_SIZE_DESC_COMPARATOR =
-  new Comparator() {
-@Override public int compare(Distributable o1, Distributable o2) {
-  long diff =
-  ((TableBlockInfo) o1).getBlockLength() - ((TableBlockInfo) 
o2).getBlockLength();
-  return diff < 0 ? 1 : (diff == 0 ? 0 : -1);
-}
-  };
-
   public TableBlockInfo(String filePath, long blockOffset, String segmentId,
   String[] locations, long blockLength, ColumnarFormatVersion version,
   String[] deletedDeltaFilePath) {
@@ -450,17 +434,4 @@ public class TableBlockInfo implements Distributable, 
Serializable {
   public void setDataMapWriterPath(String dataMapWriterPath) {
 this.dataMapWriterPath = dataMapWriterPath;
   }
-
-  @Override
-  public String toString() {
-final StringBuilder sb = new StringBuilder("TableBlockInfo{");
-sb.append("filePath='").append(filePath).appe

[35/50] [abbrv] carbondata git commit: Revert "[CARBONDATA-2023][DataLoad] Add size base block allocation in data loading"

2018-03-04 Thread jackylk
Revert "[CARBONDATA-2023][DataLoad] Add size base block allocation in data 
loading"

This reverts commit 6dd8b038fc898dbf48ad30adfc870c19eb38e3d0.


Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo
Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/1d85e916
Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/1d85e916
Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/1d85e916

Branch: refs/heads/carbonstore-rebase5
Commit: 1d85e916f6a0f070960555fb18ee4cd8acbfa315
Parents: 6216294
Author: Jacky Li 
Authored: Sat Feb 10 10:34:59 2018 +0800
Committer: Jacky Li 
Committed: Sun Mar 4 20:32:13 2018 +0800

--
 .../constants/CarbonLoadOptionConstants.java|  10 -
 .../core/datastore/block/TableBlockInfo.java|  29 --
 .../carbondata/core/util/CarbonProperties.java  |  11 -
 docs/useful-tips-on-carbondata.md   |   1 -
 .../spark/rdd/NewCarbonDataLoadRDD.scala|   4 +-
 .../spark/sql/hive/DistributionUtil.scala   |   2 +-
 .../spark/rdd/CarbonDataRDDFactory.scala|  18 +-
 .../merger/NodeMultiBlockRelation.java  |  40 --
 .../processing/util/CarbonLoaderUtil.java   | 494 +++
 .../processing/util/CarbonLoaderUtilTest.java   | 125 -
 10 files changed, 183 insertions(+), 551 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/carbondata/blob/1d85e916/core/src/main/java/org/apache/carbondata/core/constants/CarbonLoadOptionConstants.java
--
diff --git 
a/core/src/main/java/org/apache/carbondata/core/constants/CarbonLoadOptionConstants.java
 
b/core/src/main/java/org/apache/carbondata/core/constants/CarbonLoadOptionConstants.java
index a6bf60f..bcfeba0 100644
--- 
a/core/src/main/java/org/apache/carbondata/core/constants/CarbonLoadOptionConstants.java
+++ 
b/core/src/main/java/org/apache/carbondata/core/constants/CarbonLoadOptionConstants.java
@@ -114,14 +114,4 @@ public final class CarbonLoadOptionConstants {
*/
   public static final int MAX_EXTERNAL_DICTIONARY_SIZE = 1000;
 
-  /**
-   * enable block size based block allocation while loading data. By default, 
carbondata assigns
-   * blocks to node based on block number. If this option is set to `true`, 
carbondata will
-   * consider block size first and make sure that all the nodes will process 
almost equal size of
-   * data. This option is especially useful when you encounter skewed data.
-   */
-  @CarbonProperty
-  public static final String ENABLE_CARBON_LOAD_SKEWED_DATA_OPTIMIZATION
-  = "carbon.load.skewedDataOptimization.enabled";
-  public static final String 
ENABLE_CARBON_LOAD_SKEWED_DATA_OPTIMIZATION_DEFAULT = "false";
 }

http://git-wip-us.apache.org/repos/asf/carbondata/blob/1d85e916/core/src/main/java/org/apache/carbondata/core/datastore/block/TableBlockInfo.java
--
diff --git 
a/core/src/main/java/org/apache/carbondata/core/datastore/block/TableBlockInfo.java
 
b/core/src/main/java/org/apache/carbondata/core/datastore/block/TableBlockInfo.java
index c0cebe0..a7bfdba 100644
--- 
a/core/src/main/java/org/apache/carbondata/core/datastore/block/TableBlockInfo.java
+++ 
b/core/src/main/java/org/apache/carbondata/core/datastore/block/TableBlockInfo.java
@@ -19,8 +19,6 @@ package org.apache.carbondata.core.datastore.block;
 import java.io.IOException;
 import java.io.Serializable;
 import java.nio.charset.Charset;
-import java.util.Arrays;
-import java.util.Comparator;
 import java.util.HashMap;
 import java.util.Map;
 
@@ -100,20 +98,6 @@ public class TableBlockInfo implements Distributable, 
Serializable {
 
   private String dataMapWriterPath;
 
-  /**
-   * comparator to sort by block size in descending order.
-   * Since each line is not exactly the same, the size of a InputSplit may 
differs,
-   * so we allow some deviation for these splits.
-   */
-  public static final Comparator DATA_SIZE_DESC_COMPARATOR =
-  new Comparator() {
-@Override public int compare(Distributable o1, Distributable o2) {
-  long diff =
-  ((TableBlockInfo) o1).getBlockLength() - ((TableBlockInfo) 
o2).getBlockLength();
-  return diff < 0 ? 1 : (diff == 0 ? 0 : -1);
-}
-  };
-
   public TableBlockInfo(String filePath, long blockOffset, String segmentId,
   String[] locations, long blockLength, ColumnarFormatVersion version,
   String[] deletedDeltaFilePath) {
@@ -450,17 +434,4 @@ public class TableBlockInfo implements Distributable, 
Serializable {
   public void setDataMapWriterPath(String dataMapWriterPath) {
 this.dataMapWriterPath = dataMapWriterPath;
   }
-
-  @Override
-  public String toString() {
-final StringBuilder sb = new StringBuilder("TableBlockInfo{");
-sb.append("filePath='").append(filePa

[35/50] [abbrv] carbondata git commit: Revert "[CARBONDATA-2023][DataLoad] Add size base block allocation in data loading"

2018-03-04 Thread jackylk
Revert "[CARBONDATA-2023][DataLoad] Add size base block allocation in data 
loading"

This reverts commit 6dd8b038fc898dbf48ad30adfc870c19eb38e3d0.


Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo
Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/3f1d101d
Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/3f1d101d
Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/3f1d101d

Branch: refs/heads/carbonstore-rebase5
Commit: 3f1d101dc79de5680e6104d76625cd4d88d45011
Parents: 880bbce
Author: Jacky Li 
Authored: Sat Feb 10 10:34:59 2018 +0800
Committer: Jacky Li 
Committed: Sun Mar 4 20:04:49 2018 +0800

--
 .../constants/CarbonLoadOptionConstants.java|  10 -
 .../core/datastore/block/TableBlockInfo.java|  29 --
 .../carbondata/core/util/CarbonProperties.java  |  11 -
 docs/useful-tips-on-carbondata.md   |   1 -
 .../spark/rdd/NewCarbonDataLoadRDD.scala|   4 +-
 .../spark/sql/hive/DistributionUtil.scala   |   2 +-
 .../spark/rdd/CarbonDataRDDFactory.scala|  18 +-
 .../merger/NodeMultiBlockRelation.java  |  40 --
 .../processing/util/CarbonLoaderUtil.java   | 494 +++
 .../processing/util/CarbonLoaderUtilTest.java   | 125 -
 10 files changed, 183 insertions(+), 551 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/carbondata/blob/3f1d101d/core/src/main/java/org/apache/carbondata/core/constants/CarbonLoadOptionConstants.java
--
diff --git 
a/core/src/main/java/org/apache/carbondata/core/constants/CarbonLoadOptionConstants.java
 
b/core/src/main/java/org/apache/carbondata/core/constants/CarbonLoadOptionConstants.java
index a6bf60f..bcfeba0 100644
--- 
a/core/src/main/java/org/apache/carbondata/core/constants/CarbonLoadOptionConstants.java
+++ 
b/core/src/main/java/org/apache/carbondata/core/constants/CarbonLoadOptionConstants.java
@@ -114,14 +114,4 @@ public final class CarbonLoadOptionConstants {
*/
   public static final int MAX_EXTERNAL_DICTIONARY_SIZE = 1000;
 
-  /**
-   * enable block size based block allocation while loading data. By default, 
carbondata assigns
-   * blocks to node based on block number. If this option is set to `true`, 
carbondata will
-   * consider block size first and make sure that all the nodes will process 
almost equal size of
-   * data. This option is especially useful when you encounter skewed data.
-   */
-  @CarbonProperty
-  public static final String ENABLE_CARBON_LOAD_SKEWED_DATA_OPTIMIZATION
-  = "carbon.load.skewedDataOptimization.enabled";
-  public static final String 
ENABLE_CARBON_LOAD_SKEWED_DATA_OPTIMIZATION_DEFAULT = "false";
 }

http://git-wip-us.apache.org/repos/asf/carbondata/blob/3f1d101d/core/src/main/java/org/apache/carbondata/core/datastore/block/TableBlockInfo.java
--
diff --git 
a/core/src/main/java/org/apache/carbondata/core/datastore/block/TableBlockInfo.java
 
b/core/src/main/java/org/apache/carbondata/core/datastore/block/TableBlockInfo.java
index c0cebe0..a7bfdba 100644
--- 
a/core/src/main/java/org/apache/carbondata/core/datastore/block/TableBlockInfo.java
+++ 
b/core/src/main/java/org/apache/carbondata/core/datastore/block/TableBlockInfo.java
@@ -19,8 +19,6 @@ package org.apache.carbondata.core.datastore.block;
 import java.io.IOException;
 import java.io.Serializable;
 import java.nio.charset.Charset;
-import java.util.Arrays;
-import java.util.Comparator;
 import java.util.HashMap;
 import java.util.Map;
 
@@ -100,20 +98,6 @@ public class TableBlockInfo implements Distributable, 
Serializable {
 
   private String dataMapWriterPath;
 
-  /**
-   * comparator to sort by block size in descending order.
-   * Since each line is not exactly the same, the size of a InputSplit may 
differs,
-   * so we allow some deviation for these splits.
-   */
-  public static final Comparator DATA_SIZE_DESC_COMPARATOR =
-  new Comparator() {
-@Override public int compare(Distributable o1, Distributable o2) {
-  long diff =
-  ((TableBlockInfo) o1).getBlockLength() - ((TableBlockInfo) 
o2).getBlockLength();
-  return diff < 0 ? 1 : (diff == 0 ? 0 : -1);
-}
-  };
-
   public TableBlockInfo(String filePath, long blockOffset, String segmentId,
   String[] locations, long blockLength, ColumnarFormatVersion version,
   String[] deletedDeltaFilePath) {
@@ -450,17 +434,4 @@ public class TableBlockInfo implements Distributable, 
Serializable {
   public void setDataMapWriterPath(String dataMapWriterPath) {
 this.dataMapWriterPath = dataMapWriterPath;
   }
-
-  @Override
-  public String toString() {
-final StringBuilder sb = new StringBuilder("TableBlockInfo{");
-sb.append("filePath='").append(filePa

[30/50] [abbrv] carbondata git commit: Revert "[CARBONDATA-2023][DataLoad] Add size base block allocation in data loading"

2018-03-02 Thread jackylk
Revert "[CARBONDATA-2023][DataLoad] Add size base block allocation in data 
loading"

This reverts commit 6dd8b038fc898dbf48ad30adfc870c19eb38e3d0.


Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo
Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/22bb333a
Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/22bb333a
Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/22bb333a

Branch: refs/heads/carbonstore-rebase5
Commit: 22bb333a53a6f256248be519874654a794e35d17
Parents: 9062931
Author: Jacky Li 
Authored: Sat Feb 10 10:34:59 2018 +0800
Committer: Jacky Li 
Committed: Fri Mar 2 15:52:35 2018 +0800

--
 .../constants/CarbonLoadOptionConstants.java|  10 -
 .../core/datastore/block/TableBlockInfo.java|  29 --
 .../carbondata/core/util/CarbonProperties.java  |  11 -
 docs/useful-tips-on-carbondata.md   |   1 -
 .../spark/rdd/NewCarbonDataLoadRDD.scala|   4 +-
 .../spark/sql/hive/DistributionUtil.scala   |   2 +-
 .../spark/rdd/CarbonDataRDDFactory.scala|  18 +-
 .../merger/NodeMultiBlockRelation.java  |  40 --
 .../processing/util/CarbonLoaderUtil.java   | 494 +++
 .../processing/util/CarbonLoaderUtilTest.java   | 125 -
 10 files changed, 183 insertions(+), 551 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/carbondata/blob/22bb333a/core/src/main/java/org/apache/carbondata/core/constants/CarbonLoadOptionConstants.java
--
diff --git 
a/core/src/main/java/org/apache/carbondata/core/constants/CarbonLoadOptionConstants.java
 
b/core/src/main/java/org/apache/carbondata/core/constants/CarbonLoadOptionConstants.java
index a6bf60f..bcfeba0 100644
--- 
a/core/src/main/java/org/apache/carbondata/core/constants/CarbonLoadOptionConstants.java
+++ 
b/core/src/main/java/org/apache/carbondata/core/constants/CarbonLoadOptionConstants.java
@@ -114,14 +114,4 @@ public final class CarbonLoadOptionConstants {
*/
   public static final int MAX_EXTERNAL_DICTIONARY_SIZE = 1000;
 
-  /**
-   * enable block size based block allocation while loading data. By default, 
carbondata assigns
-   * blocks to node based on block number. If this option is set to `true`, 
carbondata will
-   * consider block size first and make sure that all the nodes will process 
almost equal size of
-   * data. This option is especially useful when you encounter skewed data.
-   */
-  @CarbonProperty
-  public static final String ENABLE_CARBON_LOAD_SKEWED_DATA_OPTIMIZATION
-  = "carbon.load.skewedDataOptimization.enabled";
-  public static final String 
ENABLE_CARBON_LOAD_SKEWED_DATA_OPTIMIZATION_DEFAULT = "false";
 }

http://git-wip-us.apache.org/repos/asf/carbondata/blob/22bb333a/core/src/main/java/org/apache/carbondata/core/datastore/block/TableBlockInfo.java
--
diff --git 
a/core/src/main/java/org/apache/carbondata/core/datastore/block/TableBlockInfo.java
 
b/core/src/main/java/org/apache/carbondata/core/datastore/block/TableBlockInfo.java
index c0cebe0..a7bfdba 100644
--- 
a/core/src/main/java/org/apache/carbondata/core/datastore/block/TableBlockInfo.java
+++ 
b/core/src/main/java/org/apache/carbondata/core/datastore/block/TableBlockInfo.java
@@ -19,8 +19,6 @@ package org.apache.carbondata.core.datastore.block;
 import java.io.IOException;
 import java.io.Serializable;
 import java.nio.charset.Charset;
-import java.util.Arrays;
-import java.util.Comparator;
 import java.util.HashMap;
 import java.util.Map;
 
@@ -100,20 +98,6 @@ public class TableBlockInfo implements Distributable, 
Serializable {
 
   private String dataMapWriterPath;
 
-  /**
-   * comparator to sort by block size in descending order.
-   * Since each line is not exactly the same, the size of a InputSplit may 
differs,
-   * so we allow some deviation for these splits.
-   */
-  public static final Comparator DATA_SIZE_DESC_COMPARATOR =
-  new Comparator() {
-@Override public int compare(Distributable o1, Distributable o2) {
-  long diff =
-  ((TableBlockInfo) o1).getBlockLength() - ((TableBlockInfo) 
o2).getBlockLength();
-  return diff < 0 ? 1 : (diff == 0 ? 0 : -1);
-}
-  };
-
   public TableBlockInfo(String filePath, long blockOffset, String segmentId,
   String[] locations, long blockLength, ColumnarFormatVersion version,
   String[] deletedDeltaFilePath) {
@@ -450,17 +434,4 @@ public class TableBlockInfo implements Distributable, 
Serializable {
   public void setDataMapWriterPath(String dataMapWriterPath) {
 this.dataMapWriterPath = dataMapWriterPath;
   }
-
-  @Override
-  public String toString() {
-final StringBuilder sb = new StringBuilder("TableBlockInfo{");
-sb.append("filePath='").append(filePa

[34/49] carbondata git commit: Revert "[CARBONDATA-2023][DataLoad] Add size base block allocation in data loading"

2018-02-28 Thread jackylk
Revert "[CARBONDATA-2023][DataLoad] Add size base block allocation in data 
loading"

This reverts commit 6dd8b038fc898dbf48ad30adfc870c19eb38e3d0.


Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo
Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/22287854
Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/22287854
Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/22287854

Branch: refs/heads/carbonstore-rebase5
Commit: 222878541155ca1774d61fd6b06d8825543d6fd1
Parents: a889152
Author: Jacky Li 
Authored: Sat Feb 10 10:34:59 2018 +0800
Committer: Jacky Li 
Committed: Wed Feb 28 22:05:22 2018 +0800

--
 .../constants/CarbonLoadOptionConstants.java|  10 -
 .../core/datastore/block/TableBlockInfo.java|  29 --
 .../carbondata/core/util/CarbonProperties.java  |  11 -
 docs/useful-tips-on-carbondata.md   |   1 -
 .../spark/rdd/NewCarbonDataLoadRDD.scala|   4 +-
 .../spark/sql/hive/DistributionUtil.scala   |   2 +-
 .../spark/rdd/CarbonDataRDDFactory.scala|  18 +-
 .../merger/NodeMultiBlockRelation.java  |  40 --
 .../processing/util/CarbonLoaderUtil.java   | 494 +++
 .../processing/util/CarbonLoaderUtilTest.java   | 125 -
 10 files changed, 183 insertions(+), 551 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/carbondata/blob/22287854/core/src/main/java/org/apache/carbondata/core/constants/CarbonLoadOptionConstants.java
--
diff --git 
a/core/src/main/java/org/apache/carbondata/core/constants/CarbonLoadOptionConstants.java
 
b/core/src/main/java/org/apache/carbondata/core/constants/CarbonLoadOptionConstants.java
index a6bf60f..bcfeba0 100644
--- 
a/core/src/main/java/org/apache/carbondata/core/constants/CarbonLoadOptionConstants.java
+++ 
b/core/src/main/java/org/apache/carbondata/core/constants/CarbonLoadOptionConstants.java
@@ -114,14 +114,4 @@ public final class CarbonLoadOptionConstants {
*/
   public static final int MAX_EXTERNAL_DICTIONARY_SIZE = 1000;
 
-  /**
-   * enable block size based block allocation while loading data. By default, 
carbondata assigns
-   * blocks to node based on block number. If this option is set to `true`, 
carbondata will
-   * consider block size first and make sure that all the nodes will process 
almost equal size of
-   * data. This option is especially useful when you encounter skewed data.
-   */
-  @CarbonProperty
-  public static final String ENABLE_CARBON_LOAD_SKEWED_DATA_OPTIMIZATION
-  = "carbon.load.skewedDataOptimization.enabled";
-  public static final String 
ENABLE_CARBON_LOAD_SKEWED_DATA_OPTIMIZATION_DEFAULT = "false";
 }

http://git-wip-us.apache.org/repos/asf/carbondata/blob/22287854/core/src/main/java/org/apache/carbondata/core/datastore/block/TableBlockInfo.java
--
diff --git 
a/core/src/main/java/org/apache/carbondata/core/datastore/block/TableBlockInfo.java
 
b/core/src/main/java/org/apache/carbondata/core/datastore/block/TableBlockInfo.java
index c0cebe0..a7bfdba 100644
--- 
a/core/src/main/java/org/apache/carbondata/core/datastore/block/TableBlockInfo.java
+++ 
b/core/src/main/java/org/apache/carbondata/core/datastore/block/TableBlockInfo.java
@@ -19,8 +19,6 @@ package org.apache.carbondata.core.datastore.block;
 import java.io.IOException;
 import java.io.Serializable;
 import java.nio.charset.Charset;
-import java.util.Arrays;
-import java.util.Comparator;
 import java.util.HashMap;
 import java.util.Map;
 
@@ -100,20 +98,6 @@ public class TableBlockInfo implements Distributable, 
Serializable {
 
   private String dataMapWriterPath;
 
-  /**
-   * comparator to sort by block size in descending order.
-   * Since each line is not exactly the same, the size of a InputSplit may 
differs,
-   * so we allow some deviation for these splits.
-   */
-  public static final Comparator DATA_SIZE_DESC_COMPARATOR =
-  new Comparator() {
-@Override public int compare(Distributable o1, Distributable o2) {
-  long diff =
-  ((TableBlockInfo) o1).getBlockLength() - ((TableBlockInfo) 
o2).getBlockLength();
-  return diff < 0 ? 1 : (diff == 0 ? 0 : -1);
-}
-  };
-
   public TableBlockInfo(String filePath, long blockOffset, String segmentId,
   String[] locations, long blockLength, ColumnarFormatVersion version,
   String[] deletedDeltaFilePath) {
@@ -450,17 +434,4 @@ public class TableBlockInfo implements Distributable, 
Serializable {
   public void setDataMapWriterPath(String dataMapWriterPath) {
 this.dataMapWriterPath = dataMapWriterPath;
   }
-
-  @Override
-  public String toString() {
-final StringBuilder sb = new StringBuilder("TableBlockInfo{");
-sb.append("filePath='").append(fileP

[30/50] carbondata git commit: Revert "[CARBONDATA-2023][DataLoad] Add size base block allocation in data loading"

2018-02-27 Thread jackylk
Revert "[CARBONDATA-2023][DataLoad] Add size base block allocation in data 
loading"

This reverts commit 6dd8b038fc898dbf48ad30adfc870c19eb38e3d0.


Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo
Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/53c9ac7f
Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/53c9ac7f
Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/53c9ac7f

Branch: refs/heads/carbonstore-rebase4
Commit: 53c9ac7f69d2061df17f63388e11e9d2d131875a
Parents: 6f41f36
Author: Jacky Li 
Authored: Sat Feb 10 10:34:59 2018 +0800
Committer: Jacky Li 
Committed: Tue Feb 27 16:59:48 2018 +0800

--
 .../constants/CarbonLoadOptionConstants.java|  10 -
 .../core/datastore/block/TableBlockInfo.java|  29 --
 .../carbondata/core/util/CarbonProperties.java  |  11 -
 docs/useful-tips-on-carbondata.md   |   1 -
 .../spark/rdd/NewCarbonDataLoadRDD.scala|   4 +-
 .../spark/sql/hive/DistributionUtil.scala   |   2 +-
 .../spark/rdd/CarbonDataRDDFactory.scala|  18 +-
 .../merger/NodeMultiBlockRelation.java  |  40 --
 .../processing/util/CarbonLoaderUtil.java   | 494 +++
 .../processing/util/CarbonLoaderUtilTest.java   | 125 -
 10 files changed, 183 insertions(+), 551 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/carbondata/blob/53c9ac7f/core/src/main/java/org/apache/carbondata/core/constants/CarbonLoadOptionConstants.java
--
diff --git 
a/core/src/main/java/org/apache/carbondata/core/constants/CarbonLoadOptionConstants.java
 
b/core/src/main/java/org/apache/carbondata/core/constants/CarbonLoadOptionConstants.java
index a6bf60f..bcfeba0 100644
--- 
a/core/src/main/java/org/apache/carbondata/core/constants/CarbonLoadOptionConstants.java
+++ 
b/core/src/main/java/org/apache/carbondata/core/constants/CarbonLoadOptionConstants.java
@@ -114,14 +114,4 @@ public final class CarbonLoadOptionConstants {
*/
   public static final int MAX_EXTERNAL_DICTIONARY_SIZE = 1000;
 
-  /**
-   * enable block size based block allocation while loading data. By default, 
carbondata assigns
-   * blocks to node based on block number. If this option is set to `true`, 
carbondata will
-   * consider block size first and make sure that all the nodes will process 
almost equal size of
-   * data. This option is especially useful when you encounter skewed data.
-   */
-  @CarbonProperty
-  public static final String ENABLE_CARBON_LOAD_SKEWED_DATA_OPTIMIZATION
-  = "carbon.load.skewedDataOptimization.enabled";
-  public static final String 
ENABLE_CARBON_LOAD_SKEWED_DATA_OPTIMIZATION_DEFAULT = "false";
 }

http://git-wip-us.apache.org/repos/asf/carbondata/blob/53c9ac7f/core/src/main/java/org/apache/carbondata/core/datastore/block/TableBlockInfo.java
--
diff --git 
a/core/src/main/java/org/apache/carbondata/core/datastore/block/TableBlockInfo.java
 
b/core/src/main/java/org/apache/carbondata/core/datastore/block/TableBlockInfo.java
index c0cebe0..a7bfdba 100644
--- 
a/core/src/main/java/org/apache/carbondata/core/datastore/block/TableBlockInfo.java
+++ 
b/core/src/main/java/org/apache/carbondata/core/datastore/block/TableBlockInfo.java
@@ -19,8 +19,6 @@ package org.apache.carbondata.core.datastore.block;
 import java.io.IOException;
 import java.io.Serializable;
 import java.nio.charset.Charset;
-import java.util.Arrays;
-import java.util.Comparator;
 import java.util.HashMap;
 import java.util.Map;
 
@@ -100,20 +98,6 @@ public class TableBlockInfo implements Distributable, 
Serializable {
 
   private String dataMapWriterPath;
 
-  /**
-   * comparator to sort by block size in descending order.
-   * Since each line is not exactly the same, the size of a InputSplit may 
differs,
-   * so we allow some deviation for these splits.
-   */
-  public static final Comparator DATA_SIZE_DESC_COMPARATOR =
-  new Comparator() {
-@Override public int compare(Distributable o1, Distributable o2) {
-  long diff =
-  ((TableBlockInfo) o1).getBlockLength() - ((TableBlockInfo) 
o2).getBlockLength();
-  return diff < 0 ? 1 : (diff == 0 ? 0 : -1);
-}
-  };
-
   public TableBlockInfo(String filePath, long blockOffset, String segmentId,
   String[] locations, long blockLength, ColumnarFormatVersion version,
   String[] deletedDeltaFilePath) {
@@ -450,17 +434,4 @@ public class TableBlockInfo implements Distributable, 
Serializable {
   public void setDataMapWriterPath(String dataMapWriterPath) {
 this.dataMapWriterPath = dataMapWriterPath;
   }
-
-  @Override
-  public String toString() {
-final StringBuilder sb = new StringBuilder("TableBlockInfo{");
-sb.append("filePath='").append(fileP

[33/49] carbondata git commit: Revert "[CARBONDATA-2023][DataLoad] Add size base block allocation in data loading"

2018-02-26 Thread jackylk
Revert "[CARBONDATA-2023][DataLoad] Add size base block allocation in data 
loading"

This reverts commit 6dd8b038fc898dbf48ad30adfc870c19eb38e3d0.


Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo
Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/7a11f8e5
Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/7a11f8e5
Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/7a11f8e5

Branch: refs/heads/carbonstore-rebase4
Commit: 7a11f8e5feda416ee7f14dd2118bff6826789482
Parents: a6bf77f
Author: Jacky Li 
Authored: Sat Feb 10 10:34:59 2018 +0800
Committer: Jacky Li 
Committed: Tue Feb 27 09:05:07 2018 +0800

--
 .../constants/CarbonLoadOptionConstants.java|  10 -
 .../core/datastore/block/TableBlockInfo.java|  29 --
 .../carbondata/core/util/CarbonProperties.java  |  11 -
 docs/useful-tips-on-carbondata.md   |   1 -
 .../spark/rdd/NewCarbonDataLoadRDD.scala|   4 +-
 .../spark/sql/hive/DistributionUtil.scala   |   2 +-
 .../spark/rdd/CarbonDataRDDFactory.scala|  18 +-
 .../merger/NodeMultiBlockRelation.java  |  40 --
 .../processing/util/CarbonLoaderUtil.java   | 494 +++
 .../processing/util/CarbonLoaderUtilTest.java   | 125 -
 10 files changed, 183 insertions(+), 551 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/carbondata/blob/7a11f8e5/core/src/main/java/org/apache/carbondata/core/constants/CarbonLoadOptionConstants.java
--
diff --git 
a/core/src/main/java/org/apache/carbondata/core/constants/CarbonLoadOptionConstants.java
 
b/core/src/main/java/org/apache/carbondata/core/constants/CarbonLoadOptionConstants.java
index a6bf60f..bcfeba0 100644
--- 
a/core/src/main/java/org/apache/carbondata/core/constants/CarbonLoadOptionConstants.java
+++ 
b/core/src/main/java/org/apache/carbondata/core/constants/CarbonLoadOptionConstants.java
@@ -114,14 +114,4 @@ public final class CarbonLoadOptionConstants {
*/
   public static final int MAX_EXTERNAL_DICTIONARY_SIZE = 1000;
 
-  /**
-   * enable block size based block allocation while loading data. By default, 
carbondata assigns
-   * blocks to node based on block number. If this option is set to `true`, 
carbondata will
-   * consider block size first and make sure that all the nodes will process 
almost equal size of
-   * data. This option is especially useful when you encounter skewed data.
-   */
-  @CarbonProperty
-  public static final String ENABLE_CARBON_LOAD_SKEWED_DATA_OPTIMIZATION
-  = "carbon.load.skewedDataOptimization.enabled";
-  public static final String 
ENABLE_CARBON_LOAD_SKEWED_DATA_OPTIMIZATION_DEFAULT = "false";
 }

http://git-wip-us.apache.org/repos/asf/carbondata/blob/7a11f8e5/core/src/main/java/org/apache/carbondata/core/datastore/block/TableBlockInfo.java
--
diff --git 
a/core/src/main/java/org/apache/carbondata/core/datastore/block/TableBlockInfo.java
 
b/core/src/main/java/org/apache/carbondata/core/datastore/block/TableBlockInfo.java
index c0cebe0..a7bfdba 100644
--- 
a/core/src/main/java/org/apache/carbondata/core/datastore/block/TableBlockInfo.java
+++ 
b/core/src/main/java/org/apache/carbondata/core/datastore/block/TableBlockInfo.java
@@ -19,8 +19,6 @@ package org.apache.carbondata.core.datastore.block;
 import java.io.IOException;
 import java.io.Serializable;
 import java.nio.charset.Charset;
-import java.util.Arrays;
-import java.util.Comparator;
 import java.util.HashMap;
 import java.util.Map;
 
@@ -100,20 +98,6 @@ public class TableBlockInfo implements Distributable, 
Serializable {
 
   private String dataMapWriterPath;
 
-  /**
-   * comparator to sort by block size in descending order.
-   * Since each line is not exactly the same, the size of a InputSplit may 
differs,
-   * so we allow some deviation for these splits.
-   */
-  public static final Comparator DATA_SIZE_DESC_COMPARATOR =
-  new Comparator() {
-@Override public int compare(Distributable o1, Distributable o2) {
-  long diff =
-  ((TableBlockInfo) o1).getBlockLength() - ((TableBlockInfo) 
o2).getBlockLength();
-  return diff < 0 ? 1 : (diff == 0 ? 0 : -1);
-}
-  };
-
   public TableBlockInfo(String filePath, long blockOffset, String segmentId,
   String[] locations, long blockLength, ColumnarFormatVersion version,
   String[] deletedDeltaFilePath) {
@@ -450,17 +434,4 @@ public class TableBlockInfo implements Distributable, 
Serializable {
   public void setDataMapWriterPath(String dataMapWriterPath) {
 this.dataMapWriterPath = dataMapWriterPath;
   }
-
-  @Override
-  public String toString() {
-final StringBuilder sb = new StringBuilder("TableBlockInfo{");
-sb.append("filePath='").append(fileP

carbondata git commit: Revert "[CARBONDATA-2023][DataLoad] Add size base block allocation in data loading"

2018-02-09 Thread jackylk
Repository: carbondata
Updated Branches:
  refs/heads/carbonstore 6dd8b038f -> e5c32ac96


Revert "[CARBONDATA-2023][DataLoad] Add size base block allocation in data 
loading"

This reverts commit 6dd8b038fc898dbf48ad30adfc870c19eb38e3d0.


Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo
Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/e5c32ac9
Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/e5c32ac9
Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/e5c32ac9

Branch: refs/heads/carbonstore
Commit: e5c32ac96f4cf85ef7a42f2a14c31c19418a789b
Parents: 6dd8b03
Author: Jacky Li 
Authored: Sat Feb 10 10:34:59 2018 +0800
Committer: Jacky Li 
Committed: Sat Feb 10 10:34:59 2018 +0800

--
 .../constants/CarbonLoadOptionConstants.java|  10 -
 .../core/datastore/block/TableBlockInfo.java|  29 --
 .../carbondata/core/util/CarbonProperties.java  |  11 -
 docs/useful-tips-on-carbondata.md   |   1 -
 .../spark/rdd/NewCarbonDataLoadRDD.scala|   4 +-
 .../spark/sql/hive/DistributionUtil.scala   |   2 +-
 .../spark/rdd/CarbonDataRDDFactory.scala|  18 +-
 .../merger/NodeMultiBlockRelation.java  |  40 --
 .../processing/util/CarbonLoaderUtil.java   | 494 +++
 .../processing/util/CarbonLoaderUtilTest.java   | 125 -
 10 files changed, 183 insertions(+), 551 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/carbondata/blob/e5c32ac9/core/src/main/java/org/apache/carbondata/core/constants/CarbonLoadOptionConstants.java
--
diff --git 
a/core/src/main/java/org/apache/carbondata/core/constants/CarbonLoadOptionConstants.java
 
b/core/src/main/java/org/apache/carbondata/core/constants/CarbonLoadOptionConstants.java
index a6bf60f..bcfeba0 100644
--- 
a/core/src/main/java/org/apache/carbondata/core/constants/CarbonLoadOptionConstants.java
+++ 
b/core/src/main/java/org/apache/carbondata/core/constants/CarbonLoadOptionConstants.java
@@ -114,14 +114,4 @@ public final class CarbonLoadOptionConstants {
*/
   public static final int MAX_EXTERNAL_DICTIONARY_SIZE = 1000;
 
-  /**
-   * enable block size based block allocation while loading data. By default, 
carbondata assigns
-   * blocks to node based on block number. If this option is set to `true`, 
carbondata will
-   * consider block size first and make sure that all the nodes will process 
almost equal size of
-   * data. This option is especially useful when you encounter skewed data.
-   */
-  @CarbonProperty
-  public static final String ENABLE_CARBON_LOAD_SKEWED_DATA_OPTIMIZATION
-  = "carbon.load.skewedDataOptimization.enabled";
-  public static final String 
ENABLE_CARBON_LOAD_SKEWED_DATA_OPTIMIZATION_DEFAULT = "false";
 }

http://git-wip-us.apache.org/repos/asf/carbondata/blob/e5c32ac9/core/src/main/java/org/apache/carbondata/core/datastore/block/TableBlockInfo.java
--
diff --git 
a/core/src/main/java/org/apache/carbondata/core/datastore/block/TableBlockInfo.java
 
b/core/src/main/java/org/apache/carbondata/core/datastore/block/TableBlockInfo.java
index 6624311..907708c 100644
--- 
a/core/src/main/java/org/apache/carbondata/core/datastore/block/TableBlockInfo.java
+++ 
b/core/src/main/java/org/apache/carbondata/core/datastore/block/TableBlockInfo.java
@@ -19,8 +19,6 @@ package org.apache.carbondata.core.datastore.block;
 import java.io.IOException;
 import java.io.Serializable;
 import java.nio.charset.Charset;
-import java.util.Arrays;
-import java.util.Comparator;
 import java.util.HashMap;
 import java.util.Map;
 
@@ -94,20 +92,6 @@ public class TableBlockInfo implements Distributable, 
Serializable {
 
   private String dataMapWriterPath;
 
-  /**
-   * comparator to sort by block size in descending order.
-   * Since each line is not exactly the same, the size of a InputSplit may 
differs,
-   * so we allow some deviation for these splits.
-   */
-  public static final Comparator DATA_SIZE_DESC_COMPARATOR =
-  new Comparator() {
-@Override public int compare(Distributable o1, Distributable o2) {
-  long diff =
-  ((TableBlockInfo) o1).getBlockLength() - ((TableBlockInfo) 
o2).getBlockLength();
-  return diff < 0 ? 1 : (diff == 0 ? 0 : -1);
-}
-  };
-
   public TableBlockInfo(String filePath, long blockOffset, String segmentId,
   String[] locations, long blockLength, ColumnarFormatVersion version,
   String[] deletedDeltaFilePath) {
@@ -436,17 +420,4 @@ public class TableBlockInfo implements Distributable, 
Serializable {
   public void setDataMapWriterPath(String dataMapWriterPath) {
 this.dataMapWriterPath = dataMapWriterPath;
   }
-
-  @Override
-  public String toString() {
-final StringBuilder sb