This is an automated email from the ASF dual-hosted git repository.
kunalkapoor pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/carbondata.git
The following commit(s) were added to refs/heads/master by this push:
new fd0ff22 [CARBONDATA-4153] Fix DoNot Push down not equal to filter
with Cast on SI
fd0ff22 is described below
commit fd0ff22a9d5834d34b39cfd7a781e7332802e4ad
Author: Indhumathi27 <[email protected]>
AuthorDate: Tue Mar 16 15:04:25 2021 +0530
[CARBONDATA-4153] Fix DoNot Push down not equal to filter with Cast on SI
Why is this PR needed?
NOT EQUAL TO filter on SI index column, should not be pushed down to SI
table.
Currently, where x!='2' is not pushing down to SI, but where x!=2 is pushed
down to SI.
This is because "x != 2" will be wrapped in a CAST expression like NOT
EQUAL TO(cast(x as int) = 2).
What changes were proposed in this PR?
Handle CAST case while checking DONOT PUSH DOWN to SI
This closes #4108
---
.../secondaryindex/TestSIWithSecondaryIndex.scala | 16 ++++++++++++++++
.../optimizer/CarbonSecondaryIndexOptimizer.scala | 2 ++
2 files changed, 18 insertions(+)
diff --git
a/index/secondary-index/src/test/scala/org/apache/carbondata/spark/testsuite/secondaryindex/TestSIWithSecondaryIndex.scala
b/index/secondary-index/src/test/scala/org/apache/carbondata/spark/testsuite/secondaryindex/TestSIWithSecondaryIndex.scala
index e0dc1ce..a5c1d34 100644
---
a/index/secondary-index/src/test/scala/org/apache/carbondata/spark/testsuite/secondaryindex/TestSIWithSecondaryIndex.scala
+++
b/index/secondary-index/src/test/scala/org/apache/carbondata/spark/testsuite/secondaryindex/TestSIWithSecondaryIndex.scala
@@ -653,6 +653,22 @@ class TestSIWithSecondaryIndex extends QueryTest with
BeforeAndAfterAll {
sql("drop table if exists maintable")
}
+ test("test SI with donot push down not equal to filter with Cast") {
+ sql("drop table if exists maintable")
+ sql("create table maintable (a string,b string,c string) STORED AS
carbondata ")
+ sql("insert into maintable values ('aa', '3', 'cc')")
+ sql("create index indextable on table maintable(b) AS 'carbondata'")
+ val df1 = sql("select * from maintable where b!=2")
+ val df2 = sql("select * from maintable where b!='2'")
+ if (isFilterPushedDownToSI(df1.queryExecution.sparkPlan) &&
+ isFilterPushedDownToSI(df2.queryExecution.sparkPlan)) {
+ assert(false)
+ } else {
+ assert(true)
+ }
+ sql("drop table if exists maintable")
+ }
+
def createAndInsertDataIntoTable(): Unit = {
sql("drop table if exists maintable2")
sql("create table maintable2 (a string,b string,c int) STORED AS
carbondata ")
diff --git
a/integration/spark/src/main/scala/org/apache/spark/sql/secondaryindex/optimizer/CarbonSecondaryIndexOptimizer.scala
b/integration/spark/src/main/scala/org/apache/spark/sql/secondaryindex/optimizer/CarbonSecondaryIndexOptimizer.scala
index df56b24..23d17d2 100644
---
a/integration/spark/src/main/scala/org/apache/spark/sql/secondaryindex/optimizer/CarbonSecondaryIndexOptimizer.scala
+++
b/integration/spark/src/main/scala/org/apache/spark/sql/secondaryindex/optimizer/CarbonSecondaryIndexOptimizer.scala
@@ -513,6 +513,8 @@ class CarbonSecondaryIndexOptimizer(sparkSession:
SparkSession) {
val doNotPushToSI = condition match {
case IsNotNull(child: AttributeReference) => !pushDownNotNullFilter
case Not(EqualTo(left: AttributeReference, right: Literal)) => true
+ case Not(EqualTo(left: Cast, right: Literal))
+ if left.child.isInstanceOf[AttributeReference] => true
case Not(Like(left: AttributeReference, right: Literal)) => true
case Not(In(left: AttributeReference, right: Seq[Expression])) => true
case Not(Contains(left: AttributeReference, right: Literal)) => true