akashrn5 commented on a change in pull request #4020:
URL: https://github.com/apache/carbondata/pull/4020#discussion_r532795309
##########
File path:
core/src/main/java/org/apache/carbondata/core/util/CarbonProperties.java
##########
@@ -998,6 +998,33 @@ public long getMajorCompactionSize() {
return compactionSize;
}
+ /**
+ * returns minor compaction size value from carbon properties or 0 if it is
not valid or
Review comment:
shouldn't it be -1?, please change it
##########
File path:
core/src/main/java/org/apache/carbondata/core/util/CarbonProperties.java
##########
@@ -998,6 +998,33 @@ public long getMajorCompactionSize() {
return compactionSize;
}
+ /**
+ * returns minor compaction size value from carbon properties or 0 if it is
not valid or
+ * not configured
+ *
+ * @return compactionSize
+ */
+ public long getMinorCompactionSize() {
+ long compactionSize = -1;
+ try {
+ compactionSize = Long.parseLong(getProperty(
+ CarbonCommonConstants.CARBON_MINOR_COMPACTION_SIZE, "0"));
+ } catch (NumberFormatException e) {
+ LOGGER.warn("The specified value for property "
+ + CarbonCommonConstants.CARBON_MINOR_COMPACTION_SIZE + " is
incorrect."
+ + " Correct value should be long value great than 1. Taking the
default value -1" +
+ " which means not consider segment size in minor compaction.");
+ }
+ if (compactionSize <= 0 && compactionSize != -1) {
+ LOGGER.warn("The specified value for property "
+ + CarbonCommonConstants.CARBON_MINOR_COMPACTION_SIZE + " is
incorrect."
Review comment:
same as above
##########
File path:
core/src/main/java/org/apache/carbondata/core/util/CarbonProperties.java
##########
@@ -998,6 +998,33 @@ public long getMajorCompactionSize() {
return compactionSize;
}
+ /**
+ * returns minor compaction size value from carbon properties or 0 if it is
not valid or
+ * not configured
+ *
+ * @return compactionSize
+ */
+ public long getMinorCompactionSize() {
+ long compactionSize = -1;
+ try {
+ compactionSize = Long.parseLong(getProperty(
+ CarbonCommonConstants.CARBON_MINOR_COMPACTION_SIZE, "0"));
+ } catch (NumberFormatException e) {
+ LOGGER.warn("The specified value for property "
+ + CarbonCommonConstants.CARBON_MINOR_COMPACTION_SIZE + " is
incorrect."
+ + " Correct value should be long value great than 1. Taking the
default value -1" +
+ " which means not consider segment size in minor compaction.");
Review comment:
Please change as below:
`Invalid value is configured for property
CarbonCommonConstants.CARBON_MINOR_COMPACTION_SIZE, considering the default
value -1 and not considering segment Size during minor compaction.`
##########
File path:
integration/spark/src/main/scala/org/apache/carbondata/spark/util/CommonUtil.scala
##########
@@ -292,6 +293,33 @@ object CommonUtil {
}
}
+ /**
+ * This method will validate the minor compaction size specified by the user
+ * the property is used while doing minor compaction
+ *
+ * @param tableProperties
Review comment:
remove this, not needed as variable name clearly tells what it is
##########
File path:
integration/spark/src/main/scala/org/apache/carbondata/spark/util/CommonUtil.scala
##########
@@ -292,6 +293,33 @@ object CommonUtil {
}
}
+ /**
+ * This method will validate the minor compaction size specified by the user
+ * the property is used while doing minor compaction
+ *
+ * @param tableProperties
+ */
+ def validateMinorCompactionSize(tableProperties: Map[String, String]): Unit
= {
+ var minorCompactionSize: Integer = 0
+ val tblPropName = CarbonCommonConstants.TABLE_MINOR_COMPACTION_SIZE
Review comment:
please give a better variable name
##########
File path:
processing/src/main/java/org/apache/carbondata/processing/merger/CarbonDataMergerUtil.java
##########
@@ -767,18 +784,27 @@ private static boolean isMergedSegment(String segName) {
public static long getCompactionSize(CompactionType compactionType,
CarbonLoadModel carbonLoadModel) {
long compactionSize = 0;
+ Map<String, String> tblProps = carbonLoadModel.getCarbonDataLoadSchema()
+
.getCarbonTable().getTableInfo().getFactTable().getTableProperties();
switch (compactionType) {
case MAJOR:
// default value is system level option
compactionSize =
CarbonProperties.getInstance().getMajorCompactionSize();
// if table level option is identified, use it to overwrite system
level option
- Map<String, String> tblProps =
carbonLoadModel.getCarbonDataLoadSchema()
-
.getCarbonTable().getTableInfo().getFactTable().getTableProperties();
if
(tblProps.containsKey(CarbonCommonConstants.TABLE_MAJOR_COMPACTION_SIZE)) {
compactionSize = Long.parseLong(
tblProps.get(CarbonCommonConstants.TABLE_MAJOR_COMPACTION_SIZE));
}
break;
+ case MINOR:
+ // default value is system level option
+ compactionSize =
CarbonProperties.getInstance().getMinorCompactionSize();
Review comment:
you can move this to else block
##########
File path: docs/dml-of-carbondata.md
##########
@@ -529,6 +529,10 @@ CarbonData DML statements are documented here,which
includes:
* Level 1: Merging of the segments which are not yet compacted.
* Level 2: Merging of the compacted segments again to form a larger segment.
+ The segment whose data size exceed limit of carbon.minor.compaction.size
will not be included in
+ minor compaction. If user want to control the size of segment included in
minor compaction,
+ configure the property with appropriate value in MB, if not configure, will
merge segments only
+ based on number of segments.
Review comment:
change to below
`User can control the size of a segment to be included in the minor
compaction by using carbon.minor.compaction.size. If not configured, minor
compaction will consider the segments based on
carbon.compaction.level.threshold by neglecting the size of each segment`
##########
File path: docs/dml-of-carbondata.md
##########
@@ -529,6 +529,10 @@ CarbonData DML statements are documented here,which
includes:
* Level 1: Merging of the segments which are not yet compacted.
* Level 2: Merging of the compacted segments again to form a larger segment.
+ The segment whose data size exceed limit of carbon.minor.compaction.size
will not be included in
Review comment:
```suggestion
The segment whose data size exceed the limit of
carbon.minor.compaction.size will not be included in
```
##########
File path:
integration/spark/src/main/scala/org/apache/carbondata/spark/util/CommonUtil.scala
##########
@@ -292,6 +293,33 @@ object CommonUtil {
}
}
+ /**
+ * This method will validate the minor compaction size specified by the user
+ * the property is used while doing minor compaction
+ *
+ * @param tableProperties
+ */
+ def validateMinorCompactionSize(tableProperties: Map[String, String]): Unit
= {
+ var minorCompactionSize: Integer = 0
+ val tblPropName = CarbonCommonConstants.TABLE_MINOR_COMPACTION_SIZE
+ if (tableProperties.get(tblPropName).isDefined) {
+ val minorCompactionSizeStr: String =
+ parsePropertyValueStringInMB(tableProperties(tblPropName))
+ try {
+ minorCompactionSize = Integer.parseInt(minorCompactionSizeStr)
Review comment:
no need of 306 and 307, directly add that in 309 inside parseInt
##########
File path:
integration/spark/src/main/scala/org/apache/carbondata/spark/util/CommonUtil.scala
##########
@@ -292,6 +293,33 @@ object CommonUtil {
}
}
+ /**
+ * This method will validate the minor compaction size specified by the user
+ * the property is used while doing minor compaction
+ *
+ * @param tableProperties
+ */
+ def validateMinorCompactionSize(tableProperties: Map[String, String]): Unit
= {
+ var minorCompactionSize: Integer = 0
+ val tblPropName = CarbonCommonConstants.TABLE_MINOR_COMPACTION_SIZE
+ if (tableProperties.get(tblPropName).isDefined) {
+ val minorCompactionSizeStr: String =
+ parsePropertyValueStringInMB(tableProperties(tblPropName))
+ try {
+ minorCompactionSize = Integer.parseInt(minorCompactionSizeStr)
+ } catch {
+ case e: NumberFormatException =>
+ throw new MalformedCarbonCommandException(s"Invalid $tblPropName
value found: " +
+ s"$minorCompactionSizeStr, only int value greater than 0 is
supported.")
+ }
+ if (minorCompactionSize <= 0) {
+ throw new MalformedCarbonCommandException(s"Invalid $tblPropName value
found: " +
+ s"$minorCompactionSizeStr, only int value greater than 0 is
supported.")
Review comment:
Please change like below
`Invalid value $minorCompactionSizeStr configured for $tblPropName. Please
consider configuring value greater than 0`
----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
For queries about this service, please contact Infrastructure at:
[email protected]