[carbondata] branch master updated: [CARBONDATA-3452] Fix select query failure when substring on dictionary column with join

akashrn5 Thu, 08 Aug 2019 04:50:01 -0700

This is an automated email from the ASF dual-hosted git repository.

akashrn5 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/carbondata.git



The following commit(s) were added to refs/heads/master by this push:
     new ebe4057  [CARBONDATA-3452] Fix select query failure when substring on 
dictionary column with join
ebe4057 is described below

commit ebe405747474ceac4b0f022ce8b8df45947cb4ae
Author: ajantha-bhat <[email protected]>
AuthorDate: Tue Jun 25 15:48:06 2019 +0530

    [CARBONDATA-3452] Fix select query failure when substring on dictionary 
column with join
    
    Problem: select query failure when substring on dictionary column with join.
    
    Cause: when dictionary include is present, data type is updated to int from 
string in plan attribute.
    so substring was unresolved on int column. Join operation try to reference 
this attribute which is unresolved.
    
    Solution: skip updating datatype if dictionary is included in the plan
    
    This closes #3306
---
 .../spark/sql/optimizer/CarbonLateDecodeRule.scala | 20 ++++++--
 .../carbondata/query/SubQueryJoinTestSuite.scala   | 59 ++++++++++++++++++++++
 2 files changed, 76 insertions(+), 3 deletions(-)

diff --git 
a/integration/spark2/src/main/scala/org/apache/spark/sql/optimizer/CarbonLateDecodeRule.scala
 
b/integration/spark2/src/main/scala/org/apache/spark/sql/optimizer/CarbonLateDecodeRule.scala
index 3986839..93773fc 100644
--- 
a/integration/spark2/src/main/scala/org/apache/spark/sql/optimizer/CarbonLateDecodeRule.scala
+++ 
b/integration/spark2/src/main/scala/org/apache/spark/sql/optimizer/CarbonLateDecodeRule.scala
@@ -698,11 +698,25 @@ class CarbonLateDecodeRule extends Rule[LogicalPlan] with 
PredicateHelper {
         u
       case p: Project if relations.nonEmpty =>
         val prExps = p.projectList.map { prExp =>
+          var needChangeDatatype = true
           prExp.transform {
-            case attr: AttributeReference =>
-              updateDataType(attr, attrMap, allAttrsNotDecode, aliasMap)
+            case attr: AttributeReference => attr
+            case a@Alias(attr: AttributeReference, _) => a
+            case others =>
+              // datatype need to change for dictionary columns if only alias
+              // or attribute ref present.
+              // If anything else present, no need to change data type.
+              needChangeDatatype = false
+              others
+          }
+          if (needChangeDatatype) {
+            prExp.transform {
+              case attr: AttributeReference =>
+                updateDataType(attr, attrMap, allAttrsNotDecode, aliasMap)
+            }
           }
-        }.asInstanceOf[Seq[NamedExpression]]
+          prExp
+        }
         Project(prExps, p.child)
       case wd: Window if relations.nonEmpty =>
         val prExps = wd.output.map { prExp =>
diff --git 
a/integration/spark2/src/test/scala/org/apache/spark/carbondata/query/SubQueryJoinTestSuite.scala
 
b/integration/spark2/src/test/scala/org/apache/spark/carbondata/query/SubQueryJoinTestSuite.scala
new file mode 100644
index 0000000..635445a
--- /dev/null
+++ 
b/integration/spark2/src/test/scala/org/apache/spark/carbondata/query/SubQueryJoinTestSuite.scala
@@ -0,0 +1,59 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.carbondata.query
+
+import org.apache.spark.sql.Row
+import org.apache.spark.sql.common.util.Spark2QueryTest
+import org.scalatest.BeforeAndAfterAll
+
+class SubQueryJoinTestSuite extends Spark2QueryTest with BeforeAndAfterAll {
+
+  test("test to check if 2nd level subquery gives correct result") {
+    sql("drop table if exists t1")
+    sql("drop table if exists t2")
+
+    sql("create table t1 (s1 string) stored by 'carbondata' 
tblproperties('dictionary_include'='s1')")
+    sql("insert into t1 select 'abcd' ")
+    sql("insert into t1 select 'efgh' ")
+    sql("insert into t1 select 'ijkl' ")
+    sql("insert into t1 select 'MNOP' ")
+
+    sql("create table t2 (t2 string) stored by 'carbondata'")
+    sql("insert into t2 select 'ef' ")
+    sql("insert into t2 select 'mnop' ")
+    sql("insert into t2 select '4' ")
+
+    // substring
+    checkAnswer(sql(
+      "select a.ch from (select substring(s1,1,2) as ch from t1) a  join t2 h 
on (a.ch = h.t2)"),
+      Seq(Row("ef")))
+
+    // lower
+    checkAnswer(sql(
+      "select a.ch from (select lower(s1) as ch from t1) a  join t2 h on (a.ch 
= h.t2)"),
+      Seq(Row("mnop")))
+
+    // length
+    checkAnswer(sql(
+      "select a.ch from (select length(s1) as ch from t1) a  join t2 h on 
(a.ch = h.t2)"),
+      Seq(Row(4), Row(4), Row(4), Row(4)))
+
+    sql("drop table t1")
+    sql("drop table t2")
+  }
+}

[carbondata] branch master updated: [CARBONDATA-3452] Fix select query failure when substring on dictionary column with join

Reply via email to