[35/50] [abbrv] carbondata git commit: Revert "[CARBONDATA-2023][DataLoad] Add size base block allocation in data loading"
Revert "[CARBONDATA-2023][DataLoad] Add size base block allocation in data loading" This reverts commit 6dd8b038fc898dbf48ad30adfc870c19eb38e3d0. Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/1d85e916 Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/1d85e916 Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/1d85e916 Branch: refs/heads/carbonstore Commit: 1d85e916f6a0f070960555fb18ee4cd8acbfa315 Parents: 6216294 Author: Jacky Li Authored: Sat Feb 10 10:34:59 2018 +0800 Committer: Jacky Li Committed: Sun Mar 4 20:32:13 2018 +0800 -- .../constants/CarbonLoadOptionConstants.java| 10 - .../core/datastore/block/TableBlockInfo.java| 29 -- .../carbondata/core/util/CarbonProperties.java | 11 - docs/useful-tips-on-carbondata.md | 1 - .../spark/rdd/NewCarbonDataLoadRDD.scala| 4 +- .../spark/sql/hive/DistributionUtil.scala | 2 +- .../spark/rdd/CarbonDataRDDFactory.scala| 18 +- .../merger/NodeMultiBlockRelation.java | 40 -- .../processing/util/CarbonLoaderUtil.java | 494 +++ .../processing/util/CarbonLoaderUtilTest.java | 125 - 10 files changed, 183 insertions(+), 551 deletions(-) -- http://git-wip-us.apache.org/repos/asf/carbondata/blob/1d85e916/core/src/main/java/org/apache/carbondata/core/constants/CarbonLoadOptionConstants.java -- diff --git a/core/src/main/java/org/apache/carbondata/core/constants/CarbonLoadOptionConstants.java b/core/src/main/java/org/apache/carbondata/core/constants/CarbonLoadOptionConstants.java index a6bf60f..bcfeba0 100644 --- a/core/src/main/java/org/apache/carbondata/core/constants/CarbonLoadOptionConstants.java +++ b/core/src/main/java/org/apache/carbondata/core/constants/CarbonLoadOptionConstants.java @@ -114,14 +114,4 @@ public final class CarbonLoadOptionConstants { */ public static final int MAX_EXTERNAL_DICTIONARY_SIZE = 1000; - /** - * enable block size based block allocation while loading data. By default, carbondata assigns - * blocks to node based on block number. If this option is set to `true`, carbondata will - * consider block size first and make sure that all the nodes will process almost equal size of - * data. This option is especially useful when you encounter skewed data. - */ - @CarbonProperty - public static final String ENABLE_CARBON_LOAD_SKEWED_DATA_OPTIMIZATION - = "carbon.load.skewedDataOptimization.enabled"; - public static final String ENABLE_CARBON_LOAD_SKEWED_DATA_OPTIMIZATION_DEFAULT = "false"; } http://git-wip-us.apache.org/repos/asf/carbondata/blob/1d85e916/core/src/main/java/org/apache/carbondata/core/datastore/block/TableBlockInfo.java -- diff --git a/core/src/main/java/org/apache/carbondata/core/datastore/block/TableBlockInfo.java b/core/src/main/java/org/apache/carbondata/core/datastore/block/TableBlockInfo.java index c0cebe0..a7bfdba 100644 --- a/core/src/main/java/org/apache/carbondata/core/datastore/block/TableBlockInfo.java +++ b/core/src/main/java/org/apache/carbondata/core/datastore/block/TableBlockInfo.java @@ -19,8 +19,6 @@ package org.apache.carbondata.core.datastore.block; import java.io.IOException; import java.io.Serializable; import java.nio.charset.Charset; -import java.util.Arrays; -import java.util.Comparator; import java.util.HashMap; import java.util.Map; @@ -100,20 +98,6 @@ public class TableBlockInfo implements Distributable, Serializable { private String dataMapWriterPath; - /** - * comparator to sort by block size in descending order. - * Since each line is not exactly the same, the size of a InputSplit may differs, - * so we allow some deviation for these splits. - */ - public static final Comparator DATA_SIZE_DESC_COMPARATOR = - new Comparator() { -@Override public int compare(Distributable o1, Distributable o2) { - long diff = - ((TableBlockInfo) o1).getBlockLength() - ((TableBlockInfo) o2).getBlockLength(); - return diff < 0 ? 1 : (diff == 0 ? 0 : -1); -} - }; - public TableBlockInfo(String filePath, long blockOffset, String segmentId, String[] locations, long blockLength, ColumnarFormatVersion version, String[] deletedDeltaFilePath) { @@ -450,17 +434,4 @@ public class TableBlockInfo implements Distributable, Serializable { public void setDataMapWriterPath(String dataMapWriterPath) { this.dataMapWriterPath = dataMapWriterPath; } - - @Override - public String toString() { -final StringBuilder sb = new StringBuilder("TableBlockInfo{"); -sb.append("filePath='").append(filePath).appe
[35/50] [abbrv] carbondata git commit: Revert "[CARBONDATA-2023][DataLoad] Add size base block allocation in data loading"
Revert "[CARBONDATA-2023][DataLoad] Add size base block allocation in data loading" This reverts commit 6dd8b038fc898dbf48ad30adfc870c19eb38e3d0. Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/1d85e916 Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/1d85e916 Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/1d85e916 Branch: refs/heads/carbonstore-rebase5 Commit: 1d85e916f6a0f070960555fb18ee4cd8acbfa315 Parents: 6216294 Author: Jacky Li Authored: Sat Feb 10 10:34:59 2018 +0800 Committer: Jacky Li Committed: Sun Mar 4 20:32:13 2018 +0800 -- .../constants/CarbonLoadOptionConstants.java| 10 - .../core/datastore/block/TableBlockInfo.java| 29 -- .../carbondata/core/util/CarbonProperties.java | 11 - docs/useful-tips-on-carbondata.md | 1 - .../spark/rdd/NewCarbonDataLoadRDD.scala| 4 +- .../spark/sql/hive/DistributionUtil.scala | 2 +- .../spark/rdd/CarbonDataRDDFactory.scala| 18 +- .../merger/NodeMultiBlockRelation.java | 40 -- .../processing/util/CarbonLoaderUtil.java | 494 +++ .../processing/util/CarbonLoaderUtilTest.java | 125 - 10 files changed, 183 insertions(+), 551 deletions(-) -- http://git-wip-us.apache.org/repos/asf/carbondata/blob/1d85e916/core/src/main/java/org/apache/carbondata/core/constants/CarbonLoadOptionConstants.java -- diff --git a/core/src/main/java/org/apache/carbondata/core/constants/CarbonLoadOptionConstants.java b/core/src/main/java/org/apache/carbondata/core/constants/CarbonLoadOptionConstants.java index a6bf60f..bcfeba0 100644 --- a/core/src/main/java/org/apache/carbondata/core/constants/CarbonLoadOptionConstants.java +++ b/core/src/main/java/org/apache/carbondata/core/constants/CarbonLoadOptionConstants.java @@ -114,14 +114,4 @@ public final class CarbonLoadOptionConstants { */ public static final int MAX_EXTERNAL_DICTIONARY_SIZE = 1000; - /** - * enable block size based block allocation while loading data. By default, carbondata assigns - * blocks to node based on block number. If this option is set to `true`, carbondata will - * consider block size first and make sure that all the nodes will process almost equal size of - * data. This option is especially useful when you encounter skewed data. - */ - @CarbonProperty - public static final String ENABLE_CARBON_LOAD_SKEWED_DATA_OPTIMIZATION - = "carbon.load.skewedDataOptimization.enabled"; - public static final String ENABLE_CARBON_LOAD_SKEWED_DATA_OPTIMIZATION_DEFAULT = "false"; } http://git-wip-us.apache.org/repos/asf/carbondata/blob/1d85e916/core/src/main/java/org/apache/carbondata/core/datastore/block/TableBlockInfo.java -- diff --git a/core/src/main/java/org/apache/carbondata/core/datastore/block/TableBlockInfo.java b/core/src/main/java/org/apache/carbondata/core/datastore/block/TableBlockInfo.java index c0cebe0..a7bfdba 100644 --- a/core/src/main/java/org/apache/carbondata/core/datastore/block/TableBlockInfo.java +++ b/core/src/main/java/org/apache/carbondata/core/datastore/block/TableBlockInfo.java @@ -19,8 +19,6 @@ package org.apache.carbondata.core.datastore.block; import java.io.IOException; import java.io.Serializable; import java.nio.charset.Charset; -import java.util.Arrays; -import java.util.Comparator; import java.util.HashMap; import java.util.Map; @@ -100,20 +98,6 @@ public class TableBlockInfo implements Distributable, Serializable { private String dataMapWriterPath; - /** - * comparator to sort by block size in descending order. - * Since each line is not exactly the same, the size of a InputSplit may differs, - * so we allow some deviation for these splits. - */ - public static final Comparator DATA_SIZE_DESC_COMPARATOR = - new Comparator() { -@Override public int compare(Distributable o1, Distributable o2) { - long diff = - ((TableBlockInfo) o1).getBlockLength() - ((TableBlockInfo) o2).getBlockLength(); - return diff < 0 ? 1 : (diff == 0 ? 0 : -1); -} - }; - public TableBlockInfo(String filePath, long blockOffset, String segmentId, String[] locations, long blockLength, ColumnarFormatVersion version, String[] deletedDeltaFilePath) { @@ -450,17 +434,4 @@ public class TableBlockInfo implements Distributable, Serializable { public void setDataMapWriterPath(String dataMapWriterPath) { this.dataMapWriterPath = dataMapWriterPath; } - - @Override - public String toString() { -final StringBuilder sb = new StringBuilder("TableBlockInfo{"); -sb.append("filePath='").append(filePa
[35/50] [abbrv] carbondata git commit: Revert "[CARBONDATA-2023][DataLoad] Add size base block allocation in data loading"
Revert "[CARBONDATA-2023][DataLoad] Add size base block allocation in data loading" This reverts commit 6dd8b038fc898dbf48ad30adfc870c19eb38e3d0. Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/3f1d101d Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/3f1d101d Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/3f1d101d Branch: refs/heads/carbonstore-rebase5 Commit: 3f1d101dc79de5680e6104d76625cd4d88d45011 Parents: 880bbce Author: Jacky Li Authored: Sat Feb 10 10:34:59 2018 +0800 Committer: Jacky Li Committed: Sun Mar 4 20:04:49 2018 +0800 -- .../constants/CarbonLoadOptionConstants.java| 10 - .../core/datastore/block/TableBlockInfo.java| 29 -- .../carbondata/core/util/CarbonProperties.java | 11 - docs/useful-tips-on-carbondata.md | 1 - .../spark/rdd/NewCarbonDataLoadRDD.scala| 4 +- .../spark/sql/hive/DistributionUtil.scala | 2 +- .../spark/rdd/CarbonDataRDDFactory.scala| 18 +- .../merger/NodeMultiBlockRelation.java | 40 -- .../processing/util/CarbonLoaderUtil.java | 494 +++ .../processing/util/CarbonLoaderUtilTest.java | 125 - 10 files changed, 183 insertions(+), 551 deletions(-) -- http://git-wip-us.apache.org/repos/asf/carbondata/blob/3f1d101d/core/src/main/java/org/apache/carbondata/core/constants/CarbonLoadOptionConstants.java -- diff --git a/core/src/main/java/org/apache/carbondata/core/constants/CarbonLoadOptionConstants.java b/core/src/main/java/org/apache/carbondata/core/constants/CarbonLoadOptionConstants.java index a6bf60f..bcfeba0 100644 --- a/core/src/main/java/org/apache/carbondata/core/constants/CarbonLoadOptionConstants.java +++ b/core/src/main/java/org/apache/carbondata/core/constants/CarbonLoadOptionConstants.java @@ -114,14 +114,4 @@ public final class CarbonLoadOptionConstants { */ public static final int MAX_EXTERNAL_DICTIONARY_SIZE = 1000; - /** - * enable block size based block allocation while loading data. By default, carbondata assigns - * blocks to node based on block number. If this option is set to `true`, carbondata will - * consider block size first and make sure that all the nodes will process almost equal size of - * data. This option is especially useful when you encounter skewed data. - */ - @CarbonProperty - public static final String ENABLE_CARBON_LOAD_SKEWED_DATA_OPTIMIZATION - = "carbon.load.skewedDataOptimization.enabled"; - public static final String ENABLE_CARBON_LOAD_SKEWED_DATA_OPTIMIZATION_DEFAULT = "false"; } http://git-wip-us.apache.org/repos/asf/carbondata/blob/3f1d101d/core/src/main/java/org/apache/carbondata/core/datastore/block/TableBlockInfo.java -- diff --git a/core/src/main/java/org/apache/carbondata/core/datastore/block/TableBlockInfo.java b/core/src/main/java/org/apache/carbondata/core/datastore/block/TableBlockInfo.java index c0cebe0..a7bfdba 100644 --- a/core/src/main/java/org/apache/carbondata/core/datastore/block/TableBlockInfo.java +++ b/core/src/main/java/org/apache/carbondata/core/datastore/block/TableBlockInfo.java @@ -19,8 +19,6 @@ package org.apache.carbondata.core.datastore.block; import java.io.IOException; import java.io.Serializable; import java.nio.charset.Charset; -import java.util.Arrays; -import java.util.Comparator; import java.util.HashMap; import java.util.Map; @@ -100,20 +98,6 @@ public class TableBlockInfo implements Distributable, Serializable { private String dataMapWriterPath; - /** - * comparator to sort by block size in descending order. - * Since each line is not exactly the same, the size of a InputSplit may differs, - * so we allow some deviation for these splits. - */ - public static final Comparator DATA_SIZE_DESC_COMPARATOR = - new Comparator() { -@Override public int compare(Distributable o1, Distributable o2) { - long diff = - ((TableBlockInfo) o1).getBlockLength() - ((TableBlockInfo) o2).getBlockLength(); - return diff < 0 ? 1 : (diff == 0 ? 0 : -1); -} - }; - public TableBlockInfo(String filePath, long blockOffset, String segmentId, String[] locations, long blockLength, ColumnarFormatVersion version, String[] deletedDeltaFilePath) { @@ -450,17 +434,4 @@ public class TableBlockInfo implements Distributable, Serializable { public void setDataMapWriterPath(String dataMapWriterPath) { this.dataMapWriterPath = dataMapWriterPath; } - - @Override - public String toString() { -final StringBuilder sb = new StringBuilder("TableBlockInfo{"); -sb.append("filePath='").append(filePa
[30/50] [abbrv] carbondata git commit: Revert "[CARBONDATA-2023][DataLoad] Add size base block allocation in data loading"
Revert "[CARBONDATA-2023][DataLoad] Add size base block allocation in data loading" This reverts commit 6dd8b038fc898dbf48ad30adfc870c19eb38e3d0. Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/22bb333a Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/22bb333a Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/22bb333a Branch: refs/heads/carbonstore-rebase5 Commit: 22bb333a53a6f256248be519874654a794e35d17 Parents: 9062931 Author: Jacky Li Authored: Sat Feb 10 10:34:59 2018 +0800 Committer: Jacky Li Committed: Fri Mar 2 15:52:35 2018 +0800 -- .../constants/CarbonLoadOptionConstants.java| 10 - .../core/datastore/block/TableBlockInfo.java| 29 -- .../carbondata/core/util/CarbonProperties.java | 11 - docs/useful-tips-on-carbondata.md | 1 - .../spark/rdd/NewCarbonDataLoadRDD.scala| 4 +- .../spark/sql/hive/DistributionUtil.scala | 2 +- .../spark/rdd/CarbonDataRDDFactory.scala| 18 +- .../merger/NodeMultiBlockRelation.java | 40 -- .../processing/util/CarbonLoaderUtil.java | 494 +++ .../processing/util/CarbonLoaderUtilTest.java | 125 - 10 files changed, 183 insertions(+), 551 deletions(-) -- http://git-wip-us.apache.org/repos/asf/carbondata/blob/22bb333a/core/src/main/java/org/apache/carbondata/core/constants/CarbonLoadOptionConstants.java -- diff --git a/core/src/main/java/org/apache/carbondata/core/constants/CarbonLoadOptionConstants.java b/core/src/main/java/org/apache/carbondata/core/constants/CarbonLoadOptionConstants.java index a6bf60f..bcfeba0 100644 --- a/core/src/main/java/org/apache/carbondata/core/constants/CarbonLoadOptionConstants.java +++ b/core/src/main/java/org/apache/carbondata/core/constants/CarbonLoadOptionConstants.java @@ -114,14 +114,4 @@ public final class CarbonLoadOptionConstants { */ public static final int MAX_EXTERNAL_DICTIONARY_SIZE = 1000; - /** - * enable block size based block allocation while loading data. By default, carbondata assigns - * blocks to node based on block number. If this option is set to `true`, carbondata will - * consider block size first and make sure that all the nodes will process almost equal size of - * data. This option is especially useful when you encounter skewed data. - */ - @CarbonProperty - public static final String ENABLE_CARBON_LOAD_SKEWED_DATA_OPTIMIZATION - = "carbon.load.skewedDataOptimization.enabled"; - public static final String ENABLE_CARBON_LOAD_SKEWED_DATA_OPTIMIZATION_DEFAULT = "false"; } http://git-wip-us.apache.org/repos/asf/carbondata/blob/22bb333a/core/src/main/java/org/apache/carbondata/core/datastore/block/TableBlockInfo.java -- diff --git a/core/src/main/java/org/apache/carbondata/core/datastore/block/TableBlockInfo.java b/core/src/main/java/org/apache/carbondata/core/datastore/block/TableBlockInfo.java index c0cebe0..a7bfdba 100644 --- a/core/src/main/java/org/apache/carbondata/core/datastore/block/TableBlockInfo.java +++ b/core/src/main/java/org/apache/carbondata/core/datastore/block/TableBlockInfo.java @@ -19,8 +19,6 @@ package org.apache.carbondata.core.datastore.block; import java.io.IOException; import java.io.Serializable; import java.nio.charset.Charset; -import java.util.Arrays; -import java.util.Comparator; import java.util.HashMap; import java.util.Map; @@ -100,20 +98,6 @@ public class TableBlockInfo implements Distributable, Serializable { private String dataMapWriterPath; - /** - * comparator to sort by block size in descending order. - * Since each line is not exactly the same, the size of a InputSplit may differs, - * so we allow some deviation for these splits. - */ - public static final Comparator DATA_SIZE_DESC_COMPARATOR = - new Comparator() { -@Override public int compare(Distributable o1, Distributable o2) { - long diff = - ((TableBlockInfo) o1).getBlockLength() - ((TableBlockInfo) o2).getBlockLength(); - return diff < 0 ? 1 : (diff == 0 ? 0 : -1); -} - }; - public TableBlockInfo(String filePath, long blockOffset, String segmentId, String[] locations, long blockLength, ColumnarFormatVersion version, String[] deletedDeltaFilePath) { @@ -450,17 +434,4 @@ public class TableBlockInfo implements Distributable, Serializable { public void setDataMapWriterPath(String dataMapWriterPath) { this.dataMapWriterPath = dataMapWriterPath; } - - @Override - public String toString() { -final StringBuilder sb = new StringBuilder("TableBlockInfo{"); -sb.append("filePath='").append(filePa
[34/49] carbondata git commit: Revert "[CARBONDATA-2023][DataLoad] Add size base block allocation in data loading"
Revert "[CARBONDATA-2023][DataLoad] Add size base block allocation in data loading" This reverts commit 6dd8b038fc898dbf48ad30adfc870c19eb38e3d0. Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/22287854 Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/22287854 Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/22287854 Branch: refs/heads/carbonstore-rebase5 Commit: 222878541155ca1774d61fd6b06d8825543d6fd1 Parents: a889152 Author: Jacky Li Authored: Sat Feb 10 10:34:59 2018 +0800 Committer: Jacky Li Committed: Wed Feb 28 22:05:22 2018 +0800 -- .../constants/CarbonLoadOptionConstants.java| 10 - .../core/datastore/block/TableBlockInfo.java| 29 -- .../carbondata/core/util/CarbonProperties.java | 11 - docs/useful-tips-on-carbondata.md | 1 - .../spark/rdd/NewCarbonDataLoadRDD.scala| 4 +- .../spark/sql/hive/DistributionUtil.scala | 2 +- .../spark/rdd/CarbonDataRDDFactory.scala| 18 +- .../merger/NodeMultiBlockRelation.java | 40 -- .../processing/util/CarbonLoaderUtil.java | 494 +++ .../processing/util/CarbonLoaderUtilTest.java | 125 - 10 files changed, 183 insertions(+), 551 deletions(-) -- http://git-wip-us.apache.org/repos/asf/carbondata/blob/22287854/core/src/main/java/org/apache/carbondata/core/constants/CarbonLoadOptionConstants.java -- diff --git a/core/src/main/java/org/apache/carbondata/core/constants/CarbonLoadOptionConstants.java b/core/src/main/java/org/apache/carbondata/core/constants/CarbonLoadOptionConstants.java index a6bf60f..bcfeba0 100644 --- a/core/src/main/java/org/apache/carbondata/core/constants/CarbonLoadOptionConstants.java +++ b/core/src/main/java/org/apache/carbondata/core/constants/CarbonLoadOptionConstants.java @@ -114,14 +114,4 @@ public final class CarbonLoadOptionConstants { */ public static final int MAX_EXTERNAL_DICTIONARY_SIZE = 1000; - /** - * enable block size based block allocation while loading data. By default, carbondata assigns - * blocks to node based on block number. If this option is set to `true`, carbondata will - * consider block size first and make sure that all the nodes will process almost equal size of - * data. This option is especially useful when you encounter skewed data. - */ - @CarbonProperty - public static final String ENABLE_CARBON_LOAD_SKEWED_DATA_OPTIMIZATION - = "carbon.load.skewedDataOptimization.enabled"; - public static final String ENABLE_CARBON_LOAD_SKEWED_DATA_OPTIMIZATION_DEFAULT = "false"; } http://git-wip-us.apache.org/repos/asf/carbondata/blob/22287854/core/src/main/java/org/apache/carbondata/core/datastore/block/TableBlockInfo.java -- diff --git a/core/src/main/java/org/apache/carbondata/core/datastore/block/TableBlockInfo.java b/core/src/main/java/org/apache/carbondata/core/datastore/block/TableBlockInfo.java index c0cebe0..a7bfdba 100644 --- a/core/src/main/java/org/apache/carbondata/core/datastore/block/TableBlockInfo.java +++ b/core/src/main/java/org/apache/carbondata/core/datastore/block/TableBlockInfo.java @@ -19,8 +19,6 @@ package org.apache.carbondata.core.datastore.block; import java.io.IOException; import java.io.Serializable; import java.nio.charset.Charset; -import java.util.Arrays; -import java.util.Comparator; import java.util.HashMap; import java.util.Map; @@ -100,20 +98,6 @@ public class TableBlockInfo implements Distributable, Serializable { private String dataMapWriterPath; - /** - * comparator to sort by block size in descending order. - * Since each line is not exactly the same, the size of a InputSplit may differs, - * so we allow some deviation for these splits. - */ - public static final Comparator DATA_SIZE_DESC_COMPARATOR = - new Comparator() { -@Override public int compare(Distributable o1, Distributable o2) { - long diff = - ((TableBlockInfo) o1).getBlockLength() - ((TableBlockInfo) o2).getBlockLength(); - return diff < 0 ? 1 : (diff == 0 ? 0 : -1); -} - }; - public TableBlockInfo(String filePath, long blockOffset, String segmentId, String[] locations, long blockLength, ColumnarFormatVersion version, String[] deletedDeltaFilePath) { @@ -450,17 +434,4 @@ public class TableBlockInfo implements Distributable, Serializable { public void setDataMapWriterPath(String dataMapWriterPath) { this.dataMapWriterPath = dataMapWriterPath; } - - @Override - public String toString() { -final StringBuilder sb = new StringBuilder("TableBlockInfo{"); -sb.append("filePath='").append(fileP
[30/50] carbondata git commit: Revert "[CARBONDATA-2023][DataLoad] Add size base block allocation in data loading"
Revert "[CARBONDATA-2023][DataLoad] Add size base block allocation in data loading" This reverts commit 6dd8b038fc898dbf48ad30adfc870c19eb38e3d0. Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/53c9ac7f Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/53c9ac7f Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/53c9ac7f Branch: refs/heads/carbonstore-rebase4 Commit: 53c9ac7f69d2061df17f63388e11e9d2d131875a Parents: 6f41f36 Author: Jacky Li Authored: Sat Feb 10 10:34:59 2018 +0800 Committer: Jacky Li Committed: Tue Feb 27 16:59:48 2018 +0800 -- .../constants/CarbonLoadOptionConstants.java| 10 - .../core/datastore/block/TableBlockInfo.java| 29 -- .../carbondata/core/util/CarbonProperties.java | 11 - docs/useful-tips-on-carbondata.md | 1 - .../spark/rdd/NewCarbonDataLoadRDD.scala| 4 +- .../spark/sql/hive/DistributionUtil.scala | 2 +- .../spark/rdd/CarbonDataRDDFactory.scala| 18 +- .../merger/NodeMultiBlockRelation.java | 40 -- .../processing/util/CarbonLoaderUtil.java | 494 +++ .../processing/util/CarbonLoaderUtilTest.java | 125 - 10 files changed, 183 insertions(+), 551 deletions(-) -- http://git-wip-us.apache.org/repos/asf/carbondata/blob/53c9ac7f/core/src/main/java/org/apache/carbondata/core/constants/CarbonLoadOptionConstants.java -- diff --git a/core/src/main/java/org/apache/carbondata/core/constants/CarbonLoadOptionConstants.java b/core/src/main/java/org/apache/carbondata/core/constants/CarbonLoadOptionConstants.java index a6bf60f..bcfeba0 100644 --- a/core/src/main/java/org/apache/carbondata/core/constants/CarbonLoadOptionConstants.java +++ b/core/src/main/java/org/apache/carbondata/core/constants/CarbonLoadOptionConstants.java @@ -114,14 +114,4 @@ public final class CarbonLoadOptionConstants { */ public static final int MAX_EXTERNAL_DICTIONARY_SIZE = 1000; - /** - * enable block size based block allocation while loading data. By default, carbondata assigns - * blocks to node based on block number. If this option is set to `true`, carbondata will - * consider block size first and make sure that all the nodes will process almost equal size of - * data. This option is especially useful when you encounter skewed data. - */ - @CarbonProperty - public static final String ENABLE_CARBON_LOAD_SKEWED_DATA_OPTIMIZATION - = "carbon.load.skewedDataOptimization.enabled"; - public static final String ENABLE_CARBON_LOAD_SKEWED_DATA_OPTIMIZATION_DEFAULT = "false"; } http://git-wip-us.apache.org/repos/asf/carbondata/blob/53c9ac7f/core/src/main/java/org/apache/carbondata/core/datastore/block/TableBlockInfo.java -- diff --git a/core/src/main/java/org/apache/carbondata/core/datastore/block/TableBlockInfo.java b/core/src/main/java/org/apache/carbondata/core/datastore/block/TableBlockInfo.java index c0cebe0..a7bfdba 100644 --- a/core/src/main/java/org/apache/carbondata/core/datastore/block/TableBlockInfo.java +++ b/core/src/main/java/org/apache/carbondata/core/datastore/block/TableBlockInfo.java @@ -19,8 +19,6 @@ package org.apache.carbondata.core.datastore.block; import java.io.IOException; import java.io.Serializable; import java.nio.charset.Charset; -import java.util.Arrays; -import java.util.Comparator; import java.util.HashMap; import java.util.Map; @@ -100,20 +98,6 @@ public class TableBlockInfo implements Distributable, Serializable { private String dataMapWriterPath; - /** - * comparator to sort by block size in descending order. - * Since each line is not exactly the same, the size of a InputSplit may differs, - * so we allow some deviation for these splits. - */ - public static final Comparator DATA_SIZE_DESC_COMPARATOR = - new Comparator() { -@Override public int compare(Distributable o1, Distributable o2) { - long diff = - ((TableBlockInfo) o1).getBlockLength() - ((TableBlockInfo) o2).getBlockLength(); - return diff < 0 ? 1 : (diff == 0 ? 0 : -1); -} - }; - public TableBlockInfo(String filePath, long blockOffset, String segmentId, String[] locations, long blockLength, ColumnarFormatVersion version, String[] deletedDeltaFilePath) { @@ -450,17 +434,4 @@ public class TableBlockInfo implements Distributable, Serializable { public void setDataMapWriterPath(String dataMapWriterPath) { this.dataMapWriterPath = dataMapWriterPath; } - - @Override - public String toString() { -final StringBuilder sb = new StringBuilder("TableBlockInfo{"); -sb.append("filePath='").append(fileP
[33/49] carbondata git commit: Revert "[CARBONDATA-2023][DataLoad] Add size base block allocation in data loading"
Revert "[CARBONDATA-2023][DataLoad] Add size base block allocation in data loading" This reverts commit 6dd8b038fc898dbf48ad30adfc870c19eb38e3d0. Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/7a11f8e5 Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/7a11f8e5 Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/7a11f8e5 Branch: refs/heads/carbonstore-rebase4 Commit: 7a11f8e5feda416ee7f14dd2118bff6826789482 Parents: a6bf77f Author: Jacky Li Authored: Sat Feb 10 10:34:59 2018 +0800 Committer: Jacky Li Committed: Tue Feb 27 09:05:07 2018 +0800 -- .../constants/CarbonLoadOptionConstants.java| 10 - .../core/datastore/block/TableBlockInfo.java| 29 -- .../carbondata/core/util/CarbonProperties.java | 11 - docs/useful-tips-on-carbondata.md | 1 - .../spark/rdd/NewCarbonDataLoadRDD.scala| 4 +- .../spark/sql/hive/DistributionUtil.scala | 2 +- .../spark/rdd/CarbonDataRDDFactory.scala| 18 +- .../merger/NodeMultiBlockRelation.java | 40 -- .../processing/util/CarbonLoaderUtil.java | 494 +++ .../processing/util/CarbonLoaderUtilTest.java | 125 - 10 files changed, 183 insertions(+), 551 deletions(-) -- http://git-wip-us.apache.org/repos/asf/carbondata/blob/7a11f8e5/core/src/main/java/org/apache/carbondata/core/constants/CarbonLoadOptionConstants.java -- diff --git a/core/src/main/java/org/apache/carbondata/core/constants/CarbonLoadOptionConstants.java b/core/src/main/java/org/apache/carbondata/core/constants/CarbonLoadOptionConstants.java index a6bf60f..bcfeba0 100644 --- a/core/src/main/java/org/apache/carbondata/core/constants/CarbonLoadOptionConstants.java +++ b/core/src/main/java/org/apache/carbondata/core/constants/CarbonLoadOptionConstants.java @@ -114,14 +114,4 @@ public final class CarbonLoadOptionConstants { */ public static final int MAX_EXTERNAL_DICTIONARY_SIZE = 1000; - /** - * enable block size based block allocation while loading data. By default, carbondata assigns - * blocks to node based on block number. If this option is set to `true`, carbondata will - * consider block size first and make sure that all the nodes will process almost equal size of - * data. This option is especially useful when you encounter skewed data. - */ - @CarbonProperty - public static final String ENABLE_CARBON_LOAD_SKEWED_DATA_OPTIMIZATION - = "carbon.load.skewedDataOptimization.enabled"; - public static final String ENABLE_CARBON_LOAD_SKEWED_DATA_OPTIMIZATION_DEFAULT = "false"; } http://git-wip-us.apache.org/repos/asf/carbondata/blob/7a11f8e5/core/src/main/java/org/apache/carbondata/core/datastore/block/TableBlockInfo.java -- diff --git a/core/src/main/java/org/apache/carbondata/core/datastore/block/TableBlockInfo.java b/core/src/main/java/org/apache/carbondata/core/datastore/block/TableBlockInfo.java index c0cebe0..a7bfdba 100644 --- a/core/src/main/java/org/apache/carbondata/core/datastore/block/TableBlockInfo.java +++ b/core/src/main/java/org/apache/carbondata/core/datastore/block/TableBlockInfo.java @@ -19,8 +19,6 @@ package org.apache.carbondata.core.datastore.block; import java.io.IOException; import java.io.Serializable; import java.nio.charset.Charset; -import java.util.Arrays; -import java.util.Comparator; import java.util.HashMap; import java.util.Map; @@ -100,20 +98,6 @@ public class TableBlockInfo implements Distributable, Serializable { private String dataMapWriterPath; - /** - * comparator to sort by block size in descending order. - * Since each line is not exactly the same, the size of a InputSplit may differs, - * so we allow some deviation for these splits. - */ - public static final Comparator DATA_SIZE_DESC_COMPARATOR = - new Comparator() { -@Override public int compare(Distributable o1, Distributable o2) { - long diff = - ((TableBlockInfo) o1).getBlockLength() - ((TableBlockInfo) o2).getBlockLength(); - return diff < 0 ? 1 : (diff == 0 ? 0 : -1); -} - }; - public TableBlockInfo(String filePath, long blockOffset, String segmentId, String[] locations, long blockLength, ColumnarFormatVersion version, String[] deletedDeltaFilePath) { @@ -450,17 +434,4 @@ public class TableBlockInfo implements Distributable, Serializable { public void setDataMapWriterPath(String dataMapWriterPath) { this.dataMapWriterPath = dataMapWriterPath; } - - @Override - public String toString() { -final StringBuilder sb = new StringBuilder("TableBlockInfo{"); -sb.append("filePath='").append(fileP
carbondata git commit: Revert "[CARBONDATA-2023][DataLoad] Add size base block allocation in data loading"
Repository: carbondata Updated Branches: refs/heads/carbonstore 6dd8b038f -> e5c32ac96 Revert "[CARBONDATA-2023][DataLoad] Add size base block allocation in data loading" This reverts commit 6dd8b038fc898dbf48ad30adfc870c19eb38e3d0. Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/e5c32ac9 Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/e5c32ac9 Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/e5c32ac9 Branch: refs/heads/carbonstore Commit: e5c32ac96f4cf85ef7a42f2a14c31c19418a789b Parents: 6dd8b03 Author: Jacky Li Authored: Sat Feb 10 10:34:59 2018 +0800 Committer: Jacky Li Committed: Sat Feb 10 10:34:59 2018 +0800 -- .../constants/CarbonLoadOptionConstants.java| 10 - .../core/datastore/block/TableBlockInfo.java| 29 -- .../carbondata/core/util/CarbonProperties.java | 11 - docs/useful-tips-on-carbondata.md | 1 - .../spark/rdd/NewCarbonDataLoadRDD.scala| 4 +- .../spark/sql/hive/DistributionUtil.scala | 2 +- .../spark/rdd/CarbonDataRDDFactory.scala| 18 +- .../merger/NodeMultiBlockRelation.java | 40 -- .../processing/util/CarbonLoaderUtil.java | 494 +++ .../processing/util/CarbonLoaderUtilTest.java | 125 - 10 files changed, 183 insertions(+), 551 deletions(-) -- http://git-wip-us.apache.org/repos/asf/carbondata/blob/e5c32ac9/core/src/main/java/org/apache/carbondata/core/constants/CarbonLoadOptionConstants.java -- diff --git a/core/src/main/java/org/apache/carbondata/core/constants/CarbonLoadOptionConstants.java b/core/src/main/java/org/apache/carbondata/core/constants/CarbonLoadOptionConstants.java index a6bf60f..bcfeba0 100644 --- a/core/src/main/java/org/apache/carbondata/core/constants/CarbonLoadOptionConstants.java +++ b/core/src/main/java/org/apache/carbondata/core/constants/CarbonLoadOptionConstants.java @@ -114,14 +114,4 @@ public final class CarbonLoadOptionConstants { */ public static final int MAX_EXTERNAL_DICTIONARY_SIZE = 1000; - /** - * enable block size based block allocation while loading data. By default, carbondata assigns - * blocks to node based on block number. If this option is set to `true`, carbondata will - * consider block size first and make sure that all the nodes will process almost equal size of - * data. This option is especially useful when you encounter skewed data. - */ - @CarbonProperty - public static final String ENABLE_CARBON_LOAD_SKEWED_DATA_OPTIMIZATION - = "carbon.load.skewedDataOptimization.enabled"; - public static final String ENABLE_CARBON_LOAD_SKEWED_DATA_OPTIMIZATION_DEFAULT = "false"; } http://git-wip-us.apache.org/repos/asf/carbondata/blob/e5c32ac9/core/src/main/java/org/apache/carbondata/core/datastore/block/TableBlockInfo.java -- diff --git a/core/src/main/java/org/apache/carbondata/core/datastore/block/TableBlockInfo.java b/core/src/main/java/org/apache/carbondata/core/datastore/block/TableBlockInfo.java index 6624311..907708c 100644 --- a/core/src/main/java/org/apache/carbondata/core/datastore/block/TableBlockInfo.java +++ b/core/src/main/java/org/apache/carbondata/core/datastore/block/TableBlockInfo.java @@ -19,8 +19,6 @@ package org.apache.carbondata.core.datastore.block; import java.io.IOException; import java.io.Serializable; import java.nio.charset.Charset; -import java.util.Arrays; -import java.util.Comparator; import java.util.HashMap; import java.util.Map; @@ -94,20 +92,6 @@ public class TableBlockInfo implements Distributable, Serializable { private String dataMapWriterPath; - /** - * comparator to sort by block size in descending order. - * Since each line is not exactly the same, the size of a InputSplit may differs, - * so we allow some deviation for these splits. - */ - public static final Comparator DATA_SIZE_DESC_COMPARATOR = - new Comparator() { -@Override public int compare(Distributable o1, Distributable o2) { - long diff = - ((TableBlockInfo) o1).getBlockLength() - ((TableBlockInfo) o2).getBlockLength(); - return diff < 0 ? 1 : (diff == 0 ? 0 : -1); -} - }; - public TableBlockInfo(String filePath, long blockOffset, String segmentId, String[] locations, long blockLength, ColumnarFormatVersion version, String[] deletedDeltaFilePath) { @@ -436,17 +420,4 @@ public class TableBlockInfo implements Distributable, Serializable { public void setDataMapWriterPath(String dataMapWriterPath) { this.dataMapWriterPath = dataMapWriterPath; } - - @Override - public String toString() { -final StringBuilder sb