[CARBONDATA-2610] Fix for datamap creation failed on table having loaded data with null value on string datatype
Problem: Datamap creation having null values already loaded in string datatype of table fails. Solution: Check for null before converting data to the string. This closes #2376 Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/e7fed361 Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/e7fed361 Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/e7fed361 Branch: refs/heads/carbonstore Commit: e7fed361b93d7986392c469009f0f6633d71def5 Parents: ece0672 Author: Jatin <jatin.de...@knoldus.in> Authored: Thu Jun 14 22:56:09 2018 +0530 Committer: Jacky Li <jacky.li...@qq.com> Committed: Tue Jun 19 00:23:01 2018 +0800 ---------------------------------------------------------------------- .../lucene/LuceneFineGrainDataMapSuite.scala | 31 ++++++++++++++++++++ .../datamap/IndexDataMapRebuildRDD.scala | 2 +- 2 files changed, 32 insertions(+), 1 deletion(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/carbondata/blob/e7fed361/integration/spark-common-test/src/test/scala/org/apache/carbondata/datamap/lucene/LuceneFineGrainDataMapSuite.scala ---------------------------------------------------------------------- diff --git a/integration/spark-common-test/src/test/scala/org/apache/carbondata/datamap/lucene/LuceneFineGrainDataMapSuite.scala b/integration/spark-common-test/src/test/scala/org/apache/carbondata/datamap/lucene/LuceneFineGrainDataMapSuite.scala index 6530ec0..6d774a2 100644 --- a/integration/spark-common-test/src/test/scala/org/apache/carbondata/datamap/lucene/LuceneFineGrainDataMapSuite.scala +++ b/integration/spark-common-test/src/test/scala/org/apache/carbondata/datamap/lucene/LuceneFineGrainDataMapSuite.scala @@ -829,6 +829,37 @@ class LuceneFineGrainDataMapSuite extends QueryTest with BeforeAndAfterAll { sql("select * from table_stop where text_match('suggestion:*is*')").collect().length == 1) } + test("test lucene data map on null values") { + sql("DROP TABLE IF EXISTS datamap_test4") + sql("DROP TABLE IF EXISTS datamap_copy") + sql( + """ + | CREATE TABLE datamap_test4(id INT, name STRING, city STRING, age INT) + | STORED BY 'carbondata' + | TBLPROPERTIES('SORT_COLUMNS'='city,name', 'SORT_SCOPE'='LOCAL_SORT', 'autorefreshdatamap' = 'false') + """.stripMargin) + sql( + """ + | CREATE TABLE datamap_copy(id INT, name STRING, city STRING, age INT) + | STORED BY 'carbondata' + | TBLPROPERTIES('SORT_COLUMNS'='city,name', 'SORT_SCOPE'='LOCAL_SORT', 'autorefreshdatamap' = 'false') + """.stripMargin) + sql("insert into datamap_test4 select 1,'name','city',20") + sql("insert into datamap_test4 select 2,'name1','city1',20") + sql("insert into datamap_test4 select 25,cast(null as string),'city2',NULL") + sql("insert into datamap_copy select * from datamap_test4") + sql( + s""" + | CREATE DATAMAP dm4 ON TABLE datamap_test4 + | USING 'lucene' + | DMProperties('INDEX_COLUMNS'='name , city') + """.stripMargin) + checkAnswer(sql("SELECT * FROM datamap_test4 WHERE TEXT_MATCH('name:n*')"), + sql(s"select * from datamap_copy where name like '%n%'")) + sql("drop table datamap_test4") + sql("drop table datamap_copy") + } + override protected def afterAll(): Unit = { LuceneFineGrainDataMapSuite.deleteFile(file2) sql("DROP TABLE IF EXISTS normal_test") http://git-wip-us.apache.org/repos/asf/carbondata/blob/e7fed361/integration/spark2/src/main/scala/org/apache/carbondata/datamap/IndexDataMapRebuildRDD.scala ---------------------------------------------------------------------- diff --git a/integration/spark2/src/main/scala/org/apache/carbondata/datamap/IndexDataMapRebuildRDD.scala b/integration/spark2/src/main/scala/org/apache/carbondata/datamap/IndexDataMapRebuildRDD.scala index f3f2650..cde6201 100644 --- a/integration/spark2/src/main/scala/org/apache/carbondata/datamap/IndexDataMapRebuildRDD.scala +++ b/integration/spark2/src/main/scala/org/apache/carbondata/datamap/IndexDataMapRebuildRDD.scala @@ -124,7 +124,7 @@ class OriginalReadSupport(dataTypes: Array[DataType]) extends CarbonReadSupport[ override def readRow(data: Array[Object]): Array[Object] = { dataTypes.zipWithIndex.foreach { case (dataType, i) => - if (dataType == DataTypes.STRING) { + if (dataType == DataTypes.STRING && data(i) != null) { data(i) = data(i).toString } }