This is an automated email from the ASF dual-hosted git repository.
kumarvishal09 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/carbondata.git
The following commit(s) were added to refs/heads/master by this push:
new da525ec [CARBONDATA-3515] Limit local dictionary size to 16MB and
allow configuration.
da525ec is described below
commit da525ece20f6606f8b2113ca32b7acb82f0698fd
Author: ajantha-bhat <[email protected]>
AuthorDate: Tue Sep 10 10:48:26 2019 +0530
[CARBONDATA-3515] Limit local dictionary size to 16MB and allow
configuration.
problem: currently local dictionary max size is 2GB, because of this, for
varchar columns or long string columns,
local dictionary can be of 2GB size. so, as local dictionary is stored in
blocklet. blocklet size will exceed 2 GB,
even though configured maximum blocklet size is 64MB. some places inter
overflow happens during casting.
solution: Limit local dictionary size to 16MB and allow configuration.
default size is 4MB
This closes #3380
---
.../core/constants/CarbonCommonConstants.java | 11 ++++++
.../dictionaryholder/MapBasedDictionaryStore.java | 16 ++++++--
.../carbondata/core/util/CarbonProperties.java | 43 ++++++++++++++++++++++
docs/configuration-parameters.md | 1 +
4 files changed, 68 insertions(+), 3 deletions(-)
diff --git
a/core/src/main/java/org/apache/carbondata/core/constants/CarbonCommonConstants.java
b/core/src/main/java/org/apache/carbondata/core/constants/CarbonCommonConstants.java
index 67fa13f..ac77582 100644
---
a/core/src/main/java/org/apache/carbondata/core/constants/CarbonCommonConstants.java
+++
b/core/src/main/java/org/apache/carbondata/core/constants/CarbonCommonConstants.java
@@ -1209,6 +1209,17 @@ public final class CarbonCommonConstants {
public static final String CARBON_ENABLE_RANGE_COMPACTION_DEFAULT = "true";
+ @CarbonProperty
+ /**
+ * size based threshold for local dictionary in mb.
+ */
+ public static final String CARBON_LOCAL_DICTIONARY_SIZE_THRESHOLD_IN_MB =
+ "carbon.local.dictionary.size.threshold.inmb";
+
+ public static final int CARBON_LOCAL_DICTIONARY_SIZE_THRESHOLD_IN_MB_DEFAULT
= 4;
+
+ public static final int CARBON_LOCAL_DICTIONARY_SIZE_THRESHOLD_IN_MB_MAX =
16;
+
//////////////////////////////////////////////////////////////////////////////////////////
// Query parameter start here
//////////////////////////////////////////////////////////////////////////////////////////
diff --git
a/core/src/main/java/org/apache/carbondata/core/localdictionary/dictionaryholder/MapBasedDictionaryStore.java
b/core/src/main/java/org/apache/carbondata/core/localdictionary/dictionaryholder/MapBasedDictionaryStore.java
index 7b8617a..0a50451 100644
---
a/core/src/main/java/org/apache/carbondata/core/localdictionary/dictionaryholder/MapBasedDictionaryStore.java
+++
b/core/src/main/java/org/apache/carbondata/core/localdictionary/dictionaryholder/MapBasedDictionaryStore.java
@@ -20,7 +20,9 @@ import java.util.Map;
import java.util.concurrent.ConcurrentHashMap;
import org.apache.carbondata.core.cache.dictionary.DictionaryByteArrayWrapper;
+import org.apache.carbondata.core.constants.CarbonCommonConstants;
import
org.apache.carbondata.core.localdictionary.exception.DictionaryThresholdReachedException;
+import org.apache.carbondata.core.util.CarbonProperties;
/**
* Map based dictionary holder class, it will use map to hold
@@ -51,6 +53,11 @@ public class MapBasedDictionaryStore implements
DictionaryStore {
private int dictionaryThreshold;
/**
+ * dictionary threshold size in bytes
+ */
+ private long dictionarySizeThresholdInBytes;
+
+ /**
* for checking threshold is reached or not
*/
private boolean isThresholdReached;
@@ -62,6 +69,8 @@ public class MapBasedDictionaryStore implements
DictionaryStore {
public MapBasedDictionaryStore(int dictionaryThreshold) {
this.dictionaryThreshold = dictionaryThreshold;
+ this.dictionarySizeThresholdInBytes =
Integer.parseInt(CarbonProperties.getInstance()
+
.getProperty(CarbonCommonConstants.CARBON_LOCAL_DICTIONARY_SIZE_THRESHOLD_IN_MB))
<< 20;
this.dictionary = new ConcurrentHashMap<>();
this.referenceDictionaryArray = new
DictionaryByteArrayWrapper[dictionaryThreshold];
}
@@ -93,7 +102,7 @@ public class MapBasedDictionaryStore implements
DictionaryStore {
value = ++lastAssignValue;
currentSize += data.length;
// if new value is greater than threshold
- if (value > dictionaryThreshold || currentSize >= Integer.MAX_VALUE)
{
+ if (value > dictionaryThreshold || currentSize >
dictionarySizeThresholdInBytes) {
// set the threshold boolean to true
isThresholdReached = true;
// throw exception
@@ -111,9 +120,10 @@ public class MapBasedDictionaryStore implements
DictionaryStore {
private void checkIfThresholdReached() throws
DictionaryThresholdReachedException {
if (isThresholdReached) {
- if (currentSize >= Integer.MAX_VALUE) {
+ if (currentSize > dictionarySizeThresholdInBytes) {
throw new DictionaryThresholdReachedException(
- "Unable to generate dictionary. Dictionary Size crossed 2GB
limit");
+ "Unable to generate dictionary. Dictionary Size crossed bytes: "
+ + dictionarySizeThresholdInBytes);
} else {
throw new DictionaryThresholdReachedException(
"Unable to generate dictionary value. Dictionary threshold
reached");
diff --git
a/core/src/main/java/org/apache/carbondata/core/util/CarbonProperties.java
b/core/src/main/java/org/apache/carbondata/core/util/CarbonProperties.java
index adf4905..e4efc0b 100644
--- a/core/src/main/java/org/apache/carbondata/core/util/CarbonProperties.java
+++ b/core/src/main/java/org/apache/carbondata/core/util/CarbonProperties.java
@@ -202,6 +202,9 @@ public final class CarbonProperties {
case CarbonCommonConstants.CARBON_INDEX_SERVER_SERIALIZATION_THRESHOLD:
validateIndexServerSerializationThreshold();
break;
+ case CarbonCommonConstants.CARBON_LOCAL_DICTIONARY_SIZE_THRESHOLD_IN_MB:
+ validateAndGetLocalDictionarySizeThresholdInMB();
+ break;
// TODO : Validation for carbon.lock.type should be handled for
addProperty flow
default:
// none
@@ -268,6 +271,7 @@ public final class CarbonProperties {
validateStringCharacterLimit();
validateDetailQueryBatchSize();
validateIndexServerSerializationThreshold();
+ validateAndGetLocalDictionarySizeThresholdInMB();
}
/**
@@ -1789,4 +1793,43 @@ public final class CarbonProperties {
return !prefetchEnable.equalsIgnoreCase("false");
}
}
+
+ /**
+ * get local dictionary size threshold in mb.
+ */
+ private void validateAndGetLocalDictionarySizeThresholdInMB() {
+ String sizeStr = carbonProperties
+
.getProperty(CarbonCommonConstants.CARBON_LOCAL_DICTIONARY_SIZE_THRESHOLD_IN_MB);
+ String defaultValue = Integer
+
.toString(CarbonCommonConstants.CARBON_LOCAL_DICTIONARY_SIZE_THRESHOLD_IN_MB_DEFAULT);
+ if (sizeStr == null) {
+ carbonProperties
+
.setProperty(CarbonCommonConstants.CARBON_LOCAL_DICTIONARY_SIZE_THRESHOLD_IN_MB,
+ defaultValue);
+ } else {
+ try {
+ int size = Integer.parseInt(sizeStr);
+ if (size < 0 || size == 0
+ || size >
CarbonCommonConstants.CARBON_LOCAL_DICTIONARY_SIZE_THRESHOLD_IN_MB_MAX) {
+ LOGGER.info("using default value of
carbon.local.dictionary.size.threshold.inmb = "
+ + defaultValue);
+ carbonProperties
+
.setProperty(CarbonCommonConstants.CARBON_LOCAL_DICTIONARY_SIZE_THRESHOLD_IN_MB,
+ defaultValue);
+ } else {
+ LOGGER.info("using carbon.local.dictionary.size.threshold.inmb = " +
size);
+ carbonProperties
+
.setProperty(CarbonCommonConstants.CARBON_LOCAL_DICTIONARY_SIZE_THRESHOLD_IN_MB,
+ Integer.toString(size));
+ }
+ } catch (Exception ex) {
+ LOGGER.info(
+ "using default value of
carbon.local.dictionary.size.threshold.inmb = " + defaultValue);
+ carbonProperties
+
.setProperty(CarbonCommonConstants.CARBON_LOCAL_DICTIONARY_SIZE_THRESHOLD_IN_MB,
+ defaultValue);
+ }
+ }
+ }
+
}
diff --git a/docs/configuration-parameters.md b/docs/configuration-parameters.md
index da226ec..51017fe 100644
--- a/docs/configuration-parameters.md
+++ b/docs/configuration-parameters.md
@@ -96,6 +96,7 @@ This section provides the details of all the configurations
required for the Car
| carbon.minmax.allowed.byte.count | 200 | CarbonData will write the min max
values for string/varchar types column using the byte count specified by this
configuration. Max value is 1000 bytes(500 characters) and Min value is 10
bytes(5 characters). **NOTE:** This property is useful for reducing the store
size thereby improving the query performance but can lead to query degradation
if value is not configured properly. | |
| carbon.merge.index.failure.throw.exception | true | It is used to configure
whether or not merge index failure should result in data load failure also. |
| carbon.binary.decoder | None | Support configurable decode for loading. Two
decoders supported: base64 and hex |
+| carbon.local.dictionary.size.threshold.inmb | 4 | size based threshold for
local dictionary in MB, maximum allowed size is 16 MB. |
## Compaction Configuration