This is an automated email from the ASF dual-hosted git repository. jackylk pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/carbondata.git
The following commit(s) were added to refs/heads/master by this push: new 6353024 [CARBONDATA-3645] BadRecords are inserted as NULL when column is of complex data type and BAD_RECORDS_ACTION is IGNORE 6353024 is described below commit 6353024a8a2698801724f781e7e7704eaa997005 Author: Indhumathi27 <indhumathi...@gmail.com> AuthorDate: Tue Dec 31 14:31:44 2019 +0530 [CARBONDATA-3645] BadRecords are inserted as NULL when column is of complex data type and BAD_RECORDS_ACTION is IGNORE Why is this PR needed? In case if BAD_RECORDS_ACTION is IGNORE, Carbon should skip those bad records while loading. But in case of Complex data type, badrecords are inserted as NULL. Here, badRecordAdded flag is not set, from second BadRecord found,since message is already added to columnMessageMap for first found BadRecord. What changes were proposed in this PR? Set badRecordAdded flag, if badRecord is found. Does this PR introduce any user interface change? No Is any new testcase added? Yes This closes #3551 --- .../src/test/resources/badrecords/complexdata.csv | 11 +++++++++++ .../testsuite/badrecordloger/BadRecordActionTest.scala | 14 ++++++++++++++ .../carbondata/processing/datatypes/PrimitiveDataType.java | 2 +- 3 files changed, 26 insertions(+), 1 deletion(-) diff --git a/integration/spark-common-test/src/test/resources/badrecords/complexdata.csv b/integration/spark-common-test/src/test/resources/badrecords/complexdata.csv new file mode 100644 index 0000000..9ec85da --- /dev/null +++ b/integration/spark-common-test/src/test/resources/badrecords/complexdata.csv @@ -0,0 +1,11 @@ +arrayColumn,structColumn,arrayStruct +1997-03-20 14:00:09,1$1997-03-20 14:00:09,1#1997-03-20 14:00:09 +1997-03-32 14:00:09,2$1997-03-20 14:00:10,2#1997-03-20 14:00:10 +1997-03-33 14:00:09,3$1997-03-20 14:00:11,3#1997-03-20 14:00:11 +1997-03-31 14:00:09,4$1997-03-20 14:00:12,4#1997-03-20 14:00:12 +1997-03-20 14:00:09,a$1997-03-20 14:00:13,5#1997-03-20 14:00:13 +1997-03-21 14:00:09,b$1997-03-20 14:00:14,6#1997-03-20 14:00:14 +1997-03-22 14:00:09,5$1997-03-20 14:00:15,5#1997-03-20 14:00:15 +1997-03-23 14:00:09,6$1997-03-20 14:00:16,6#1997-03-20 14:00:16 +1997-03-24 14:00:09,7$1997-03-20 14:00:17,7#1997-03-20 14:00:17 +1997-03-25 14:00:09,8$1997-03-20 14:00:18,8#1997-03-50 14:00:18 diff --git a/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/badrecordloger/BadRecordActionTest.scala b/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/badrecordloger/BadRecordActionTest.scala index 82337a3..5d2f746 100644 --- a/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/badrecordloger/BadRecordActionTest.scala +++ b/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/badrecordloger/BadRecordActionTest.scala @@ -255,6 +255,20 @@ class BadRecordActionTest extends QueryTest { Seq(Row(2))) } + test("test bad record IGNORE with complex data types") { + val timeStampFormat = CarbonProperties.getInstance().getProperty(CarbonCommonConstants.CARBON_TIMESTAMP_FORMAT) + CarbonProperties.getInstance().addProperty(CarbonCommonConstants.CARBON_TIMESTAMP_FORMAT, CarbonCommonConstants.CARBON_TIMESTAMP_DEFAULT_FORMAT) + sql("drop table if exists complextable") + sql("create table complextable(arrayColumn array<timestamp>, structColumn struct<s1:int,s2:timestamp>,arraystruct array<Struct<as1:int,as2:timestamp>>) stored by 'carbondata'") + sql(s"LOAD DATA local inpath '$resourcesPath/badrecords/complexdata.csv' INTO TABLE complextable OPTIONS('bad_records_action'='ignore', 'DELIMITER'=',', " + + "'QUOTECHAR'= '\"','COMPLEX_DELIMITER_LEVEL_1'='$','COMPLEX_DELIMITER_LEVEL_2'='#')") + checkAnswer(sql("select count(*) from complextable"), Seq(Row(5))) + sql("drop table if exists complextable") + if(null != timeStampFormat) { + CarbonProperties.getInstance().addProperty(CarbonCommonConstants.CARBON_TIMESTAMP_FORMAT, timeStampFormat) + } + } + private def currentPath: String = { new File(this.getClass.getResource("/").getPath + "../../") diff --git a/processing/src/main/java/org/apache/carbondata/processing/datatypes/PrimitiveDataType.java b/processing/src/main/java/org/apache/carbondata/processing/datatypes/PrimitiveDataType.java index 23e9322..9376980 100644 --- a/processing/src/main/java/org/apache/carbondata/processing/datatypes/PrimitiveDataType.java +++ b/processing/src/main/java/org/apache/carbondata/processing/datatypes/PrimitiveDataType.java @@ -425,8 +425,8 @@ public class PrimitiveDataType implements GenericDataType<Object> { message = CarbonDataProcessorUtil .prepareFailureReason(carbonDimension.getColName(), carbonDimension.getDataType()); logHolder.getColumnMessageMap().put(carbonDimension.getColName(), message); - logHolder.setReason(message); } + logHolder.setReason(message); } @Override