This is an automated email from the ASF dual-hosted git repository.

kunalkapoor pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/carbondata.git


The following commit(s) were added to refs/heads/master by this push:
     new 3b411bb  [CARBONDATA-4172] Select query having parent and child struct 
column in projection returns incorrect results
3b411bb is described below

commit 3b411bbea81ed9c70e88a4933dfff2742d686b6e
Author: Indhumathi27 <[email protected]>
AuthorDate: Thu Apr 22 13:50:39 2021 +0530

    [CARBONDATA-4172] Select query having parent and child struct column in 
projection returns incorrect results
    
    Why is this PR needed?
    After PR-3574, a scenario has been missed while code refactor.
    Currently, if select query has both Parent and its child struct column in 
projection,
    only child column is pushed down to carbon for filling result. For other 
columns in parent Struct, data output is null.
    
    What changes were proposed in this PR?
    If parent struct column is also present in projection, then push down only 
parent column to carbon.
    
    This closes #4123
---
 .../sql/execution/strategy/PushDownHelper.scala    | 26 ++++++++---
 .../complexType/TestComplexDataType.scala          | 51 +++-------------------
 2 files changed, 26 insertions(+), 51 deletions(-)

diff --git 
a/integration/spark/src/main/scala/org/apache/spark/sql/execution/strategy/PushDownHelper.scala
 
b/integration/spark/src/main/scala/org/apache/spark/sql/execution/strategy/PushDownHelper.scala
index e532cd4..c8a8501 100644
--- 
a/integration/spark/src/main/scala/org/apache/spark/sql/execution/strategy/PushDownHelper.scala
+++ 
b/integration/spark/src/main/scala/org/apache/spark/sql/execution/strategy/PushDownHelper.scala
@@ -17,7 +17,7 @@
 
 package org.apache.spark.sql.execution.strategy
 
-import scala.collection.mutable.ArrayBuffer
+import scala.collection.mutable.{ArrayBuffer, ListBuffer}
 
 import org.apache.spark.sql.catalyst.expressions.{AttributeReference, 
GetArrayItem, GetMapValue, GetStructField, Literal, NamedExpression}
 import org.apache.spark.sql.types.{ArrayType, DataType, MapType, StructType}
@@ -35,15 +35,24 @@ object PushDownHelper {
     // parent/child field and push down the corresponding project column. In 
case of Array, Map,
     // ArrayOfStruct, StructOfArray, MapOfStruct or StructOfMap, push down 
parent column
     val output = ArrayBuffer[String]()
-    projects.foreach(PushDownHelper.collectColumns(_, output))
+    var parentColumns = ArrayBuffer[String]()
+    projects.foreach(PushDownHelper.collectColumns(_, output, parentColumns))
+    parentColumns = parentColumns.distinct
     if (output.isEmpty) {
       requiredColumns.foreach(projection.addColumn)
     } else {
       requiredColumns.map(_.toLowerCase).foreach { requiredColumn =>
         val childOption = output.filter(_.startsWith(requiredColumn + "."))
-        childOption.isEmpty match {
-          case true => projection.addColumn(requiredColumn)
-          case false => childOption.foreach(projection.addColumn)
+        if (childOption.isEmpty) {
+          projection.addColumn(requiredColumn)
+        } else {
+          // If projection contains both parent and its child, then push down 
parent column
+          // itself instead of its child column
+          if (parentColumns.contains(requiredColumn)) {
+            projection.addColumn(requiredColumn)
+          } else {
+            childOption.foreach(projection.addColumn)
+          }
         }
       }
     }
@@ -51,7 +60,8 @@ object PushDownHelper {
 
   private def collectColumns(
       exp: NamedExpression,
-      pushDownColumns: ArrayBuffer[String]
+      pushDownColumns: ArrayBuffer[String],
+      parentColumns: ArrayBuffer[String]
   ): Unit = {
     exp transform {
       case struct: GetStructField =>
@@ -69,7 +79,9 @@ object PushDownHelper {
         pushDownColumns += getParentName(map)
         Literal.TrueLiteral
       case attr: AttributeReference =>
-        pushDownColumns += attr.name.toLowerCase
+        val attrName = attr.name.toLowerCase
+        pushDownColumns += attrName
+        parentColumns += attrName
         Literal.TrueLiteral
     }
   }
diff --git 
a/integration/spark/src/test/scala/org/apache/carbondata/integration/spark/testsuite/complexType/TestComplexDataType.scala
 
b/integration/spark/src/test/scala/org/apache/carbondata/integration/spark/testsuite/complexType/TestComplexDataType.scala
index 88f5613..6c457dd 100644
--- 
a/integration/spark/src/test/scala/org/apache/carbondata/integration/spark/testsuite/complexType/TestComplexDataType.scala
+++ 
b/integration/spark/src/test/scala/org/apache/carbondata/integration/spark/testsuite/complexType/TestComplexDataType.scala
@@ -611,51 +611,14 @@ class TestComplexDataType extends QueryTest with 
BeforeAndAfterAll {
     checkAnswer(sql("select a.d,a.f from table1"),
       Seq(Row(2, Row(3, "mno", 4)), Row(2, Row(3, "mno", 4)), Row(2, Row(3, 
"mno", 4))))
     checkAnswer(sql("select a.j from table1"), Seq(Row(5), Row(5), Row(5)))
-    checkAnswer(sql("select * from table1"),
+    checkAnswer(sql("select a.b,a from table1"),
       Seq(Row(1, Row(1, "abc", 2, "efg", Row(3, "mno", 4), 5)),
-        Row(2, Row(1, "abc", 2, "efg", Row(3, "mno", 4), 5)),
-        Row(3, Row(1, "abc", 2, "efg", Row(3, "mno", 4), 5))))
-    checkAnswer(sql("select *,a from table1"),
-      Seq(Row(1,
-        Row(1, "abc", 2, "efg", Row(3, "mno", 4), 5),
-        Row(1, "abc", 2, "efg", Row(3, "mno", 4), 5)),
-        Row(2,
-          Row(1, "abc", 2, "efg", Row(3, "mno", 4), 5),
-          Row(1, "abc", 2, "efg", Row(3, "mno", 4), 5)),
-        Row(3,
-          Row(1, "abc", 2, "efg", Row(3, "mno", 4), 5),
-          Row(1, "abc", 2, "efg", Row(3, "mno", 4), 5))))
-  }
-
-  test("test Projection PushDown for StructofStruct for Dictionary Include ") {
-    sql("DROP TABLE IF EXISTS table1")
-    sql(
-      "create table table1 (roll int,a 
struct<b:int,c:string,d:int,e:string,f:struct<g:int," +
-      "h:string,i:int>,j:int>) STORED AS carbondata ")
-    sql("insert into table1 values(1,named_struct('b', 1, 'c', 'abc', 'd', 2, 
'e', " +
-        "'efg', 'f', named_struct('g', 3, 'h', 'mno', 'i', 4), 'j', 5))")
-    sql("insert into table1 values(2,named_struct('b', 1, 'c', 'abc', 'd', 2, 
'e', " +
-        "'efg', 'f', named_struct('g', 3, 'h', 'mno', 'i', 4), 'j', 5))")
-    sql("insert into table1 values(3,named_struct('b', 1, 'c', 'abc', 'd', 2, 
'e', " +
-        "'efg', 'f', named_struct('g', 3, 'h', 'mno', 'i', 4), 'j', 5))")
-    checkAnswer(sql("select a.b from table1"), Seq(Row(1), Row(1), Row(1)))
-    checkAnswer(sql("select a.c from table1"), Seq(Row("abc"), Row("abc"), 
Row("abc")))
-    checkAnswer(sql("select a.d from table1"), Seq(Row(2), Row(2), Row(2)))
-    checkAnswer(sql("select a.e from table1"), Seq(Row("efg"), Row("efg"), 
Row("efg")))
-    checkAnswer(sql("select a.f from table1"),
-      Seq(Row(Row(3, "mno", 4)), Row(Row(3, "mno", 4)), Row(Row(3, "mno", 4))))
-    checkAnswer(sql("select a.f.g  from table1"), Seq(Row(3), Row(3), Row(3)))
-    checkAnswer(sql("select a.f.h  from table1"), Seq(Row("mno"), Row("mno"), 
Row("mno")))
-    checkAnswer(sql("select a.f.i  from table1"), Seq(Row(4), Row(4), Row(4)))
-    checkAnswer(sql("select a.f.g,a.f.h,a.f.i  from table1"),
-      Seq(Row(3, "mno", 4), Row(3, "mno", 4), Row(3, "mno", 4)))
-    checkAnswer(sql("select a.b,a.f from table1"),
-      Seq(Row(1, Row(3, "mno", 4)), Row(1, Row(3, "mno", 4)), Row(1, Row(3, 
"mno", 4))))
-    checkAnswer(sql("select a.c,a.f from table1"),
-      Seq(Row("abc", Row(3, "mno", 4)), Row("abc", Row(3, "mno", 4)), 
Row("abc", Row(3, "mno", 4))))
-    checkAnswer(sql("select a.d,a.f from table1"),
-      Seq(Row(2, Row(3, "mno", 4)), Row(2, Row(3, "mno", 4)), Row(2, Row(3, 
"mno", 4))))
-    checkAnswer(sql("select a.j from table1"), Seq(Row(5), Row(5), Row(5)))
+        Row(1, Row(1, "abc", 2, "efg", Row(3, "mno", 4), 5)),
+        Row(1, Row(1, "abc", 2, "efg", Row(3, "mno", 4), 5))))
+    checkAnswer(sql("select a.b,a,a from table1"),
+      Seq(Row(1, Row(1, "abc", 2, "efg", Row(3, "mno", 4), 5), Row(1, "abc", 
2, "efg", Row(3, "mno", 4), 5)),
+          Row(1, Row(1, "abc", 2, "efg", Row(3, "mno", 4), 5), Row(1, "abc", 
2, "efg", Row(3, "mno", 4), 5)),
+          Row(1, Row(1, "abc", 2, "efg", Row(3, "mno", 4), 5), Row(1, "abc", 
2, "efg", Row(3, "mno", 4), 5))))
     checkAnswer(sql("select * from table1"),
       Seq(Row(1, Row(1, "abc", 2, "efg", Row(3, "mno", 4), 5)),
         Row(2, Row(1, "abc", 2, "efg", Row(3, "mno", 4), 5)),

Reply via email to