This is an automated email from the ASF dual-hosted git repository. ajantha pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/carbondata.git
The following commit(s) were added to refs/heads/master by this push: new aab943a [CARBONDATA-3846] Data load issue for boolean column configured as BUCKET_COLUMNS aab943a is described below commit aab943ae53b882861672316be6581fd2a8a833bc Author: akkio-97 <akshay.nuth...@gmail.com> AuthorDate: Thu Jul 9 01:52:35 2020 +0530 [CARBONDATA-3846] Data load issue for boolean column configured as BUCKET_COLUMNS Why is this PR needed? During data load with bucket column, boolean values were retrieved using wrong index. Issue occurs only when table has more than one column(including boolean type). Due to this wrong index, wrong column data was retrieved, instead of boolean, hence throwing ClassCastException and data load failure. What changes were proposed in this PR? Boolean values are now retrieved using the correct index. Does this PR introduce any user interface change? No Is any new testcase added? Yes This closes #3831 --- .../bucketing/TableBucketingTestCase.scala | 22 ++++++++++++++++++++++ .../impl/SparkHashExpressionPartitionerImpl.java | 2 +- 2 files changed, 23 insertions(+), 1 deletion(-) diff --git a/integration/spark/src/test/scala/org/apache/spark/carbondata/bucketing/TableBucketingTestCase.scala b/integration/spark/src/test/scala/org/apache/spark/carbondata/bucketing/TableBucketingTestCase.scala index 0bfa93e..5d7dbda 100644 --- a/integration/spark/src/test/scala/org/apache/spark/carbondata/bucketing/TableBucketingTestCase.scala +++ b/integration/spark/src/test/scala/org/apache/spark/carbondata/bucketing/TableBucketingTestCase.scala @@ -1040,6 +1040,28 @@ class TableBucketingTestCase extends QueryTest with BeforeAndAfterAll { assert(shuffleExists2, "shuffle should exist when some bucket columns not exist in filter") } + test("test load data with boolean type as bucket column") { + sql("drop table if exists boolean_table") + sql( + s""" + | CREATE TABLE boolean_table( + | booleanField BOOLEAN, + | stringField STRING, + | intField INT + | ) + | STORED AS carbondata + | TBLPROPERTIES('BUCKET_NUMBER'='1', 'BUCKET_COLUMNS'='booleanField') + """.stripMargin) + + sql( + s""" + | LOAD DATA LOCAL INPATH '$resourcesPath/bool/supportBooleanWithFileHeader.csv' + | INTO TABLE boolean_table + """.stripMargin) + + checkAnswer(sql("select count(*) from boolean_table"), Row(10)) + } + override def afterAll { CarbonProperties.getInstance() .addProperty(CarbonCommonConstants.CARBON_TIMESTAMP_FORMAT, diff --git a/processing/src/main/java/org/apache/carbondata/processing/loading/partition/impl/SparkHashExpressionPartitionerImpl.java b/processing/src/main/java/org/apache/carbondata/processing/loading/partition/impl/SparkHashExpressionPartitionerImpl.java index 81501e6..3853bf2 100644 --- a/processing/src/main/java/org/apache/carbondata/processing/loading/partition/impl/SparkHashExpressionPartitionerImpl.java +++ b/processing/src/main/java/org/apache/carbondata/processing/loading/partition/impl/SparkHashExpressionPartitionerImpl.java @@ -93,7 +93,7 @@ public class SparkHashExpressionPartitionerImpl implements Partitioner<CarbonRow } int intValue = 0; if (value[index] instanceof Boolean) { - boolean boolValue = (boolean) value[intValue]; + boolean boolValue = (boolean) value[index]; intValue = boolValue ? 1 : 0; } else if (value[index] instanceof Float) { intValue = Float.floatToIntBits((float) value[index]);