taiyang-li commented on code in PR #7992:
URL: https://github.com/apache/incubator-gluten/pull/7992#discussion_r1859643183


##########
backends-clickhouse/src/test/scala/org/apache/gluten/execution/hive/GlutenClickHouseHiveTableSuite.scala:
##########
@@ -1490,15 +1490,92 @@ class GlutenClickHouseHiveTableSuite
     val scan = df.queryExecution.executedPlan.collect {
       case scan: FileSourceScanExecTransformer => scan
     }.head
-
-    val schema = scan.schema
-    assert(schema.size == 1)
-    val fieldType = schema.fields.head.dataType.asInstanceOf[StructType]
+    val fieldType = scan.schema.fields.head.dataType.asInstanceOf[StructType]
     assert(fieldType.size == 1)
-
     spark.sql("drop table if exists aj")
   }
 
+  test("Nested column pruning for Project(Filter(Generate)) on generator") {
+    def assertFieldSizeAfterPruning(df: DataFrame, expectSize: Int): Unit = {
+      val scan = df.queryExecution.executedPlan.collect {
+        case scan: FileSourceScanExecTransformer => scan
+      }.head
+
+      val fieldType =
+        scan.schema.fields.head.dataType
+          .asInstanceOf[ArrayType]
+          .elementType
+          .asInstanceOf[StructType]
+      assert(fieldType.size == expectSize)
+    }
+
+    spark.sql("drop table if exists ajog")
+    spark.sql(
+      """
+        |CREATE TABLE if not exists ajog (
+        |  country STRING,
+        |  events ARRAY<STRUCT<time:BIGINT, lng:BIGINT, lat:BIGINT, net:STRING,
+        |     log_extra:MAP<STRING, STRING>, event_id:STRING, 
event_info:MAP<STRING, STRING>>>
+        |)
+        |USING orc
+      """.stripMargin)
+
+    spark.sql("""
+                |INSERT INTO ajog VALUES
+                |  ('USA', array(named_struct('time', 1622547800, 'lng', -122, 
'lat', 37, 'net',
+                |    'wifi', 'log_extra', map('key1', 'value1'), 'event_id', 
'event1',
+                |    'event_info', map('tab_type', '5', 'action', '13')))),
+                |  ('Canada', array(named_struct('time', 1622547801, 'lng', 
-79, 'lat', 43, 'net',
+                |    '4g', 'log_extra', map('key2', 'value2'), 'event_id', 
'event2',
+                |    'event_info', map('tab_type', '4', 'action', '12'))))
+       """.stripMargin)
+
+    // Test nested column pruning on generator with single field extracted
+    val df1 =
+      spark.sql("""
+                  |select
+                  |case when event.event_info['tab_type'] in (5) then '1' else 
'0' end as entrance
+                  |from ajog
+                  |lateral view explode(events)  as event
+                  |where  event.event_info['action'] in (13)
+      """.stripMargin)
+    assertFieldSizeAfterPruning(df1, 1)
+
+    // Test nested column pruning on generator with multiple field extracted,
+    // which resolves SPARK-34956
+    val df2 =
+      spark.sql("""
+                  |select event.event_id,
+                  |case when event.event_info['tab_type'] in (5) then '1' else 
'0' end as entrance
+                  |from ajog
+                  |lateral view explode(events)  as event
+                  |where  event.event_info['action'] in (13)
+      """.stripMargin)
+    assertFieldSizeAfterPruning(df2, 2)
+
+    // Test nested column pruning with two adjacent generate operator
+    val df3 =
+      spark.sql("""
+                  |SELECT
+                  |abflag,
+                  |event.event_info,
+                  |event.log_extra
+                  |FROM
+                  |ajog
+                  |LATERAL VIEW EXPLODE(events) AS event
+                  |LATERAL VIEW EXPLODE(split(event.log_extra['abflags_v3'], 
',')) AS abflag
+                  |WHERE
+                  |event.event_id = 'xx'
+                  |AND event.event_info['dispatch_id'] IS NOT NULL
+                  |AND event.event_info['dispatch_id'] != ''
+                  |AND event.log_extra['scene'] = 'xxx'
+                  |LIMIT 100;
+      """.stripMargin)
+    assertFieldSizeAfterPruning(df3, 3)

Review Comment:
   fixed



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to