This is an automated email from the ASF dual-hosted git repository.
ajantha pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/carbondata.git
The following commit(s) were added to refs/heads/master by this push:
new 690efc4 [CARBONDATA-3942] Fix type cast when loading data into
partitioned table
690efc4 is described below
commit 690efc4b067da4e5538425f72c3eca5550627aa8
Author: IceMimosa <[email protected]>
AuthorDate: Thu Aug 6 16:32:33 2020 +0800
[CARBONDATA-3942] Fix type cast when loading data into partitioned table
Why is this PR needed?
Loading Int type data to carbondata double type, the value will be broken
like this:
+---+----+
|cnt |time|
+---+----+
|4.9E-323|2020|
|1.0E-322|2020|
+---+----+
original cnt value is: 10, 20
What changes were proposed in this PR?
handle cast for partition table
Does this PR introduce any user interface change?
No
Is any new testcase added?
Yes
This closes #3884
---
.../org/apache/spark/sql/hive/CarbonAnalysisRules.scala | 12 +++++-------
.../testsuite/allqueries/InsertIntoCarbonTableTestCase.scala | 11 +++++++++++
2 files changed, 16 insertions(+), 7 deletions(-)
diff --git
a/integration/spark/src/main/scala/org/apache/spark/sql/hive/CarbonAnalysisRules.scala
b/integration/spark/src/main/scala/org/apache/spark/sql/hive/CarbonAnalysisRules.scala
index 508060e..6ff2573 100644
---
a/integration/spark/src/main/scala/org/apache/spark/sql/hive/CarbonAnalysisRules.scala
+++
b/integration/spark/src/main/scala/org/apache/spark/sql/hive/CarbonAnalysisRules.scala
@@ -254,9 +254,10 @@ case class CarbonPreInsertionCasts(sparkSession:
SparkSession) extends Rule[Logi
}
// In spark, PreprocessTableInsertion rule has below cast logic.
// It was missed in carbon when implemented insert into rules.
- var newChildOutput = if (child.output.size ==
carbonDSRelation.carbonRelation.output.size) {
- val expectedOutput = carbonDSRelation.carbonRelation.output
- child.output.zip(expectedOutput).map {
+ val actualOutput = child.output
+ val expectedOutput = carbonDSRelation.carbonRelation.output
+ var newChildOutput = child.output.zip(expectedOutput)
+ .map {
case (actual, expected) =>
if (expected.dataType.sameType(actual.dataType) &&
expected.name == actual.name &&
@@ -269,10 +270,7 @@ case class CarbonPreInsertionCasts(sparkSession:
SparkSession) extends Rule[Logi
Alias(Cast(actual, expected.dataType), expected.name)(
explicitMetadata = Option(expected.metadata))
}
- }
- } else {
- child.output
- }
+ } ++ actualOutput.takeRight(actualOutput.size - expectedOutput.size)
if (newChildOutput.size >= carbonDSRelation.carbonRelation.output.size ||
carbonDSRelation.carbonTable.isHivePartitionTable) {
newChildOutput = newChildOutput.zipWithIndex.map { columnWithIndex =>
diff --git
a/integration/spark/src/test/scala/org/apache/carbondata/spark/testsuite/allqueries/InsertIntoCarbonTableTestCase.scala
b/integration/spark/src/test/scala/org/apache/carbondata/spark/testsuite/allqueries/InsertIntoCarbonTableTestCase.scala
index 89a260c..9588d27 100644
---
a/integration/spark/src/test/scala/org/apache/carbondata/spark/testsuite/allqueries/InsertIntoCarbonTableTestCase.scala
+++
b/integration/spark/src/test/scala/org/apache/carbondata/spark/testsuite/allqueries/InsertIntoCarbonTableTestCase.scala
@@ -391,6 +391,17 @@ class InsertIntoCarbonTableTestCase extends QueryTest with
BeforeAndAfterAll {
assert(e.getMessage.contains("number of columns are different"))
}
+ test("test insert into partitioned table with int type to double type") {
+ sql("DROP TABLE IF EXISTS table1")
+ sql("CREATE TABLE table1 (cnt double) partitioned by (pt string) stored as
carbondata")
+ sql("insert overwrite table table1 partition(pt='2020') select 10")
+ checkAnswer(
+ sql("select * from table1"),
+ sql("select 10.0, '2020'")
+ )
+ sql(s"DROP TABLE IF EXISTS table1")
+ }
+
override def afterAll {
sql("drop table if exists load")
sql("drop table if exists inser")