This is an automated email from the ASF dual-hosted git repository.
jackylk pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/carbondata.git
The following commit(s) were added to refs/heads/master by this push:
new 6353024 [CARBONDATA-3645] BadRecords are inserted as NULL when column
is of complex data type and BAD_RECORDS_ACTION is IGNORE
6353024 is described below
commit 6353024a8a2698801724f781e7e7704eaa997005
Author: Indhumathi27 <[email protected]>
AuthorDate: Tue Dec 31 14:31:44 2019 +0530
[CARBONDATA-3645] BadRecords are inserted as NULL when column is of complex
data type and BAD_RECORDS_ACTION is IGNORE
Why is this PR needed?
In case if BAD_RECORDS_ACTION is IGNORE, Carbon should skip those bad
records while loading. But in case of Complex data type, badrecords are
inserted as NULL. Here, badRecordAdded flag is not set, from second BadRecord
found,since message is already added to columnMessageMap for first found
BadRecord.
What changes were proposed in this PR?
Set badRecordAdded flag, if badRecord is found.
Does this PR introduce any user interface change?
No
Is any new testcase added?
Yes
This closes #3551
---
.../src/test/resources/badrecords/complexdata.csv | 11 +++++++++++
.../testsuite/badrecordloger/BadRecordActionTest.scala | 14 ++++++++++++++
.../carbondata/processing/datatypes/PrimitiveDataType.java | 2 +-
3 files changed, 26 insertions(+), 1 deletion(-)
diff --git
a/integration/spark-common-test/src/test/resources/badrecords/complexdata.csv
b/integration/spark-common-test/src/test/resources/badrecords/complexdata.csv
new file mode 100644
index 0000000..9ec85da
--- /dev/null
+++
b/integration/spark-common-test/src/test/resources/badrecords/complexdata.csv
@@ -0,0 +1,11 @@
+arrayColumn,structColumn,arrayStruct
+1997-03-20 14:00:09,1$1997-03-20 14:00:09,1#1997-03-20 14:00:09
+1997-03-32 14:00:09,2$1997-03-20 14:00:10,2#1997-03-20 14:00:10
+1997-03-33 14:00:09,3$1997-03-20 14:00:11,3#1997-03-20 14:00:11
+1997-03-31 14:00:09,4$1997-03-20 14:00:12,4#1997-03-20 14:00:12
+1997-03-20 14:00:09,a$1997-03-20 14:00:13,5#1997-03-20 14:00:13
+1997-03-21 14:00:09,b$1997-03-20 14:00:14,6#1997-03-20 14:00:14
+1997-03-22 14:00:09,5$1997-03-20 14:00:15,5#1997-03-20 14:00:15
+1997-03-23 14:00:09,6$1997-03-20 14:00:16,6#1997-03-20 14:00:16
+1997-03-24 14:00:09,7$1997-03-20 14:00:17,7#1997-03-20 14:00:17
+1997-03-25 14:00:09,8$1997-03-20 14:00:18,8#1997-03-50 14:00:18
diff --git
a/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/badrecordloger/BadRecordActionTest.scala
b/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/badrecordloger/BadRecordActionTest.scala
index 82337a3..5d2f746 100644
---
a/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/badrecordloger/BadRecordActionTest.scala
+++
b/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/badrecordloger/BadRecordActionTest.scala
@@ -255,6 +255,20 @@ class BadRecordActionTest extends QueryTest {
Seq(Row(2)))
}
+ test("test bad record IGNORE with complex data types") {
+ val timeStampFormat =
CarbonProperties.getInstance().getProperty(CarbonCommonConstants.CARBON_TIMESTAMP_FORMAT)
+
CarbonProperties.getInstance().addProperty(CarbonCommonConstants.CARBON_TIMESTAMP_FORMAT,
CarbonCommonConstants.CARBON_TIMESTAMP_DEFAULT_FORMAT)
+ sql("drop table if exists complextable")
+ sql("create table complextable(arrayColumn array<timestamp>, structColumn
struct<s1:int,s2:timestamp>,arraystruct array<Struct<as1:int,as2:timestamp>>)
stored by 'carbondata'")
+ sql(s"LOAD DATA local inpath '$resourcesPath/badrecords/complexdata.csv'
INTO TABLE complextable OPTIONS('bad_records_action'='ignore', 'DELIMITER'=',',
" +
+ "'QUOTECHAR'=
'\"','COMPLEX_DELIMITER_LEVEL_1'='$','COMPLEX_DELIMITER_LEVEL_2'='#')")
+ checkAnswer(sql("select count(*) from complextable"), Seq(Row(5)))
+ sql("drop table if exists complextable")
+ if(null != timeStampFormat) {
+
CarbonProperties.getInstance().addProperty(CarbonCommonConstants.CARBON_TIMESTAMP_FORMAT,
timeStampFormat)
+ }
+ }
+
private def currentPath: String = {
new File(this.getClass.getResource("/").getPath + "../../")
diff --git
a/processing/src/main/java/org/apache/carbondata/processing/datatypes/PrimitiveDataType.java
b/processing/src/main/java/org/apache/carbondata/processing/datatypes/PrimitiveDataType.java
index 23e9322..9376980 100644
---
a/processing/src/main/java/org/apache/carbondata/processing/datatypes/PrimitiveDataType.java
+++
b/processing/src/main/java/org/apache/carbondata/processing/datatypes/PrimitiveDataType.java
@@ -425,8 +425,8 @@ public class PrimitiveDataType implements
GenericDataType<Object> {
message = CarbonDataProcessorUtil
.prepareFailureReason(carbonDimension.getColName(),
carbonDimension.getDataType());
logHolder.getColumnMessageMap().put(carbonDimension.getColName(),
message);
- logHolder.setReason(message);
}
+ logHolder.setReason(message);
}
@Override