[carbondata] branch master updated: [CARBONDATA-4243] Fixed si with column meta cache on same column

indhumuthumurugesh Thu, 07 Oct 2021 01:13:52 -0700

This is an automated email from the ASF dual-hosted git repository.

indhumuthumurugesh pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/carbondata.git



The following commit(s) were added to refs/heads/master by this push:
     new 9944936  [CARBONDATA-4243] Fixed si with column meta cache on same 
column
9944936 is described below

commit 9944936d684f241c5e81ca809507f97d1ef82f26
Author: nihal0107 <[email protected]>
AuthorDate: Mon Sep 27 18:36:52 2021 +0530

    [CARBONDATA-4243] Fixed si with column meta cache on same column
    
    Why is this PR needed?
    Currently, the select query fails when table contains SI and 
column_meta_cache
    on the same columns with to date() UDF. This is happening because 
pushdownfilters
    is null in CarbonDataSourceScanHelper and it is causing null pointer 
exception.
    
    What changes were proposed in this PR?
    At place of passing null value for pushdownfilters in 
CarbonDataSourceScan.doCanonicalize passed Seq.empty.
    
    This closes #4225
---
 .../secondaryindex/TestSIWithSecondaryIndex.scala  | 36 +++++++++++++++++++++-
 .../execution/strategy/CarbonDataSourceScan.scala  |  2 +-
 2 files changed, 36 insertions(+), 2 deletions(-)

diff --git 
a/index/secondary-index/src/test/scala/org/apache/carbondata/spark/testsuite/secondaryindex/TestSIWithSecondaryIndex.scala
 
b/index/secondary-index/src/test/scala/org/apache/carbondata/spark/testsuite/secondaryindex/TestSIWithSecondaryIndex.scala
index 6d18e6c..03feffd 100644
--- 
a/index/secondary-index/src/test/scala/org/apache/carbondata/spark/testsuite/secondaryindex/TestSIWithSecondaryIndex.scala
+++ 
b/index/secondary-index/src/test/scala/org/apache/carbondata/spark/testsuite/secondaryindex/TestSIWithSecondaryIndex.scala
@@ -16,6 +16,8 @@
  */
 package org.apache.carbondata.spark.testsuite.secondaryindex
 
+import java.sql.Date
+
 import mockit.{Mock, MockUp}
 import org.apache.commons.lang3.StringUtils
 import org.apache.spark.sql.{CarbonEnv, Row}
@@ -29,7 +31,6 @@ import org.apache.carbondata.core.locks.AbstractCarbonLock
 import org.apache.carbondata.core.statusmanager.{LoadMetadataDetails, 
SegmentStatus, SegmentStatusManager}
 import org.apache.carbondata.core.util.CarbonProperties
 import org.apache.carbondata.core.util.path.CarbonTablePath
-import org.apache.carbondata.spark.exception.ProcessMetaDataException
 import 
org.apache.carbondata.spark.testsuite.secondaryindex.TestSecondaryIndexUtils.isFilterPushedDownToSI
 
 class TestSIWithSecondaryIndex extends QueryTest with BeforeAndAfterAll {
@@ -114,6 +115,39 @@ class TestSIWithSecondaryIndex extends QueryTest with 
BeforeAndAfterAll {
       .contains("Table property global_sort_partitions : -1 is invalid"))
   }
 
+  test ("test SI with column meta cache") {
+    val timeStampFormat = CarbonProperties.getInstance()
+      .getProperty(CarbonCommonConstants.CARBON_TIMESTAMP_FORMAT)
+    CarbonProperties.getInstance()
+      .addProperty(CarbonCommonConstants.CARBON_TIMESTAMP_FORMAT, "dd-MM-yyyy")
+      .addProperty(CarbonCommonConstants.CARBON_DATE_FORMAT, "dd-MM-yyyy")
+    try {
+      sql("drop table if exists uniqdataTable")
+      sql("CREATE table uniqdataTable (empno int, empname String, designation 
String, " +
+        "doj Timestamp, workgroupcategory int, workgroupcategoryname String, 
deptno int, " +
+        "deptname String, projectcode int, projectjoindate Timestamp, 
projectenddate Timestamp," +
+        " attendance int, utilization int,salary int) STORED AS carbondata " +
+        "TBLPROPERTIES('COLUMN_META_CACHE'='projectjoindate')")
+      sql("create index uniqdataindex1 on table uniqdataTable 
(projectjoindate) AS 'carbondata'")
+      sql(s"LOAD DATA LOCAL INPATH '$resourcesPath/data.csv' INTO TABLE 
uniqdataTable " +
+        "OPTIONS('DELIMITER'=',')")
+
+      checkAnswer(sql("select 
max(to_date(projectjoindate)),min(to_date(projectjoindate))," +
+        "count(to_date(projectjoindate)) from uniqdataTable where 
to_date(projectjoindate)=" +
+        "'2007-02-17' or to_date(projectjoindate)='2011-01-29' union select " +
+        "max(to_date(projectjoindate)), min(to_date(projectjoindate))," +
+        "count(to_date(projectjoindate)) from uniqdataTable where 
to_date(projectjoindate)=" +
+        "'2007-02-17' or to_date(projectjoindate)='2011-01-29'"), Seq(Row(Date
+        .valueOf("2011-01-29"), Date.valueOf("2007-02-17"), 2)))
+      sql("drop table if exists uniqdataTable")
+    } finally {
+      CarbonProperties.getInstance()
+        .addProperty(CarbonCommonConstants.CARBON_TIMESTAMP_FORMAT, 
timeStampFormat)
+        .addProperty(CarbonCommonConstants.CARBON_DATE_FORMAT,
+          CarbonCommonConstants.CARBON_DATE_DEFAULT_FORMAT)
+    }
+  }
+
   test("test create secondary index global sort before insert") {
     sql("create table table11 (name string, id string, country string) stored 
as carbondata")
     sql("create index table11_index on table table11(id, country) as 
'carbondata' properties" +
diff --git 
a/integration/spark/src/main/scala/org/apache/spark/sql/execution/strategy/CarbonDataSourceScan.scala
 
b/integration/spark/src/main/scala/org/apache/spark/sql/execution/strategy/CarbonDataSourceScan.scala
index fd1336f..2e1bb96 100644
--- 
a/integration/spark/src/main/scala/org/apache/spark/sql/execution/strategy/CarbonDataSourceScan.scala
+++ 
b/integration/spark/src/main/scala/org/apache/spark/sql/execution/strategy/CarbonDataSourceScan.scala
@@ -144,7 +144,7 @@ case class CarbonDataSourceScan(
       QueryPlan.normalizePredicates(dataFilters, output),
       null,
       null,
-      null,
+      Seq.empty,
       directScanSupport,
       extraRDD,
       tableIdentifier,

[carbondata] branch master updated: [CARBONDATA-4243] Fixed si with column meta cache on same column

Reply via email to