Repository: incubator-carbondata Updated Branches: refs/heads/master 759e76d26 -> 64615f100
Update DOC about table level blocksize Supprot MB/M for table_block_size Update doc Fix style Fix style Project: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/commit/ce7a1c5b Tree: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/tree/ce7a1c5b Diff: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/diff/ce7a1c5b Branch: refs/heads/master Commit: ce7a1c5bfc034806ef8e6ed0df469a844d228059 Parents: 759e76d Author: Zhangshunyu <zhangshu...@huawei.com> Authored: Sat Oct 8 15:28:24 2016 +0800 Committer: jackylk <jacky.li...@huawei.com> Committed: Sun Oct 9 22:34:44 2016 +0800 ---------------------------------------------------------------------- docs/DDL-Operations-on-Carbon.md | 8 +++++ .../carbondata/spark/util/CommonUtil.scala | 33 +++++++++++++++++--- .../org/apache/spark/sql/CarbonSqlParser.scala | 1 + .../execution/command/carbonTableSchema.scala | 3 +- .../dataload/TestTableLevelBlockSize.scala | 10 +++--- .../apache/spark/sql/TestCarbonSqlParser.scala | 2 ++ 6 files changed, 46 insertions(+), 11 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/ce7a1c5b/docs/DDL-Operations-on-Carbon.md ---------------------------------------------------------------------- diff --git a/docs/DDL-Operations-on-Carbon.md b/docs/DDL-Operations-on-Carbon.md index da3a44f..07b2be3 100644 --- a/docs/DDL-Operations-on-Carbon.md +++ b/docs/DDL-Operations-on-Carbon.md @@ -67,6 +67,14 @@ Here, DICTIONARY_EXCLUDE will exclude dictionary creation. This is applicable fo ```ruby TBLPROPERTIES ("COLUMN_GROUPS"="(column1,column3),(Column4,Column5,Column6)") ``` + - **Table Block Size Configuration** + + The block size of one table's files on hdfs can be defined using an int value whose size is in MB, the range is form 1MB to 2048MB and the default value is 1024MB, if user didn't define this values in ddl, it would use default value to set. + + ```ruby + TBLPROPERTIES ("TABLE_BLOCKSIZE"="512 MB") + ``` +Here 512 MB means the block size of this table is 512 MB, user also can set it as 512M or 512. - **Inverted Index Configuration** Inverted index is very useful to improve compression ratio and query speed, especially for those low-cardinality columns who are in reward position. http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/ce7a1c5b/integration/spark/src/main/scala/org/apache/carbondata/spark/util/CommonUtil.scala ---------------------------------------------------------------------- diff --git a/integration/spark/src/main/scala/org/apache/carbondata/spark/util/CommonUtil.scala b/integration/spark/src/main/scala/org/apache/carbondata/spark/util/CommonUtil.scala index 418da4c..8b89f5d 100644 --- a/integration/spark/src/main/scala/org/apache/carbondata/spark/util/CommonUtil.scala +++ b/integration/spark/src/main/scala/org/apache/carbondata/spark/util/CommonUtil.scala @@ -19,6 +19,8 @@ package org.apache.carbondata.spark.util import java.util import java.util.UUID +import scala.collection.mutable.Map + import org.apache.spark.sql.execution.command.{ColumnProperty, Field} import org.apache.carbondata.core.carbon.metadata.datatype.DataType @@ -208,21 +210,42 @@ object CommonUtil { def validateTableBlockSize(tableProperties: Map[String, String]): Unit = { var tableBlockSize: Integer = 0 if (tableProperties.get(CarbonCommonConstants.TABLE_BLOCKSIZE).isDefined) { - val blockSizeStr: String = tableProperties.get(CarbonCommonConstants.TABLE_BLOCKSIZE).get + val blockSizeStr: String = + parsePropertyValueStringInMB(tableProperties.get(CarbonCommonConstants.TABLE_BLOCKSIZE).get) try { tableBlockSize = Integer.parseInt(blockSizeStr) } catch { case e: NumberFormatException => throw new MalformedCarbonCommandException("Invalid table_blocksize value found: " + - s"$blockSizeStr, only int value from 1 to " + - s"2048 is supported.") + s"$blockSizeStr, only int value from 1 MB to " + + s"2048 MB is supported.") } if (tableBlockSize < CarbonCommonConstants.BLOCK_SIZE_MIN_VAL || tableBlockSize > CarbonCommonConstants.BLOCK_SIZE_MAX_VAL) { throw new MalformedCarbonCommandException("Invalid table_blocksize value found: " + - s"$blockSizeStr, only int value from 1 to 2048 " + - s"is supported.") + s"$blockSizeStr, only int value from 1 MB to " + + s"2048 MB is supported.") } + tableProperties.put(CarbonCommonConstants.TABLE_BLOCKSIZE, blockSizeStr) } } + + /** + * This method will parse the configure string from 'XX MB/M' to 'XX' + * + * @param propertyValueString + */ + def parsePropertyValueStringInMB(propertyValueString: String): String = { + var parsedPropertyValueString: String = propertyValueString + if (propertyValueString.trim.toLowerCase.endsWith("mb")) { + parsedPropertyValueString = propertyValueString.trim.toLowerCase + .substring(0, propertyValueString.trim.toLowerCase.lastIndexOf("mb")).trim + } + if (propertyValueString.trim.toLowerCase.endsWith("m")) { + parsedPropertyValueString = propertyValueString.trim.toLowerCase + .substring(0, propertyValueString.trim.toLowerCase.lastIndexOf("m")).trim + } + parsedPropertyValueString + } + } http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/ce7a1c5b/integration/spark/src/main/scala/org/apache/spark/sql/CarbonSqlParser.scala ---------------------------------------------------------------------- diff --git a/integration/spark/src/main/scala/org/apache/spark/sql/CarbonSqlParser.scala b/integration/spark/src/main/scala/org/apache/spark/sql/CarbonSqlParser.scala index 15075f6..775d48d 100644 --- a/integration/spark/src/main/scala/org/apache/spark/sql/CarbonSqlParser.scala +++ b/integration/spark/src/main/scala/org/apache/spark/sql/CarbonSqlParser.scala @@ -21,6 +21,7 @@ import java.util.regex.{Matcher, Pattern} import scala.collection.JavaConverters._ import scala.collection.mutable.LinkedHashSet +import scala.collection.mutable.Map import scala.language.implicitConversions import scala.util.matching.Regex http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/ce7a1c5b/integration/spark/src/main/scala/org/apache/spark/sql/execution/command/carbonTableSchema.scala ---------------------------------------------------------------------- diff --git a/integration/spark/src/main/scala/org/apache/spark/sql/execution/command/carbonTableSchema.scala b/integration/spark/src/main/scala/org/apache/spark/sql/execution/command/carbonTableSchema.scala index bbc0ba7..4ade2be 100644 --- a/integration/spark/src/main/scala/org/apache/spark/sql/execution/command/carbonTableSchema.scala +++ b/integration/spark/src/main/scala/org/apache/spark/sql/execution/command/carbonTableSchema.scala @@ -24,6 +24,7 @@ import java.util.UUID import scala.collection.JavaConverters._ import scala.collection.mutable.ArrayBuffer +import scala.collection.mutable.Map import scala.language.implicitConversions import scala.util.Random @@ -1029,7 +1030,7 @@ case class LoadTable( tableName: String, factPathFromUser: String, dimFilesPath: Seq[DataLoadTableFileMapping], - partionValues: Map[String, String], + partionValues: scala.collection.immutable.Map[String, String], isOverwriteExist: Boolean = false, var inputSqlString: String = null, dataFrame: Option[DataFrame] = None) extends RunnableCommand { http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/ce7a1c5b/integration/spark/src/test/scala/org/apache/carbondata/spark/testsuite/dataload/TestTableLevelBlockSize.scala ---------------------------------------------------------------------- diff --git a/integration/spark/src/test/scala/org/apache/carbondata/spark/testsuite/dataload/TestTableLevelBlockSize.scala b/integration/spark/src/test/scala/org/apache/carbondata/spark/testsuite/dataload/TestTableLevelBlockSize.scala index b5278aa..b7ae638 100644 --- a/integration/spark/src/test/scala/org/apache/carbondata/spark/testsuite/dataload/TestTableLevelBlockSize.scala +++ b/integration/spark/src/test/scala/org/apache/carbondata/spark/testsuite/dataload/TestTableLevelBlockSize.scala @@ -57,13 +57,13 @@ class TestTableLevelBlockSize extends QueryTest with BeforeAndAfterAll{ (ID Int, date Timestamp, country String, name String, phonetype String, serialname String, salary Int) STORED BY 'org.apache.carbondata.format' - TBLPROPERTIES('table_blocksize'='4096') + TBLPROPERTIES('table_blocksize'='4096 MB') """) assert(false) } catch { case e : MalformedCarbonCommandException => { assert(e.getMessage.equals("Invalid table_blocksize value found: 4096, " + - "only int value from 1 to 2048 is supported.")) + "only int value from 1 MB to 2048 MB is supported.")) } } } @@ -76,13 +76,13 @@ class TestTableLevelBlockSize extends QueryTest with BeforeAndAfterAll{ (ID Int, date Timestamp, country String, name String, phonetype String, serialname String, salary Int) STORED BY 'org.apache.carbondata.format' - TBLPROPERTIES('table_blocksize'='10Y4') + TBLPROPERTIES('table_blocksize'='10Y4 MB') """) assert(false) } catch { case e : MalformedCarbonCommandException => { assert(e.getMessage.equals("Invalid table_blocksize value found: 10y4, " + - "only int value from 1 to 2048 is supported.")) + "only int value from 1 MB to 2048 MB is supported.")) } } } @@ -95,7 +95,7 @@ class TestTableLevelBlockSize extends QueryTest with BeforeAndAfterAll{ (ID Int, date Timestamp, country String, name String, phonetype String, serialname String, salary Int) STORED BY 'org.apache.carbondata.format' - TBLPROPERTIES('table_blocksize'='512') + TBLPROPERTIES('table_blocksize'='512 MB') """) CarbonProperties.getInstance() http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/ce7a1c5b/integration/spark/src/test/scala/org/apache/spark/sql/TestCarbonSqlParser.scala ---------------------------------------------------------------------- diff --git a/integration/spark/src/test/scala/org/apache/spark/sql/TestCarbonSqlParser.scala b/integration/spark/src/test/scala/org/apache/spark/sql/TestCarbonSqlParser.scala index 04e47bb..39d1491 100644 --- a/integration/spark/src/test/scala/org/apache/spark/sql/TestCarbonSqlParser.scala +++ b/integration/spark/src/test/scala/org/apache/spark/sql/TestCarbonSqlParser.scala @@ -18,6 +18,8 @@ */ package org.apache.spark.sql +import scala.collection.mutable.Map + import org.apache.spark.sql.common.util.QueryTest import org.apache.spark.sql.execution.command.Field import org.apache.carbondata.core.constants.CarbonCommonConstants