[hudi] branch master updated: [HUDI-6941] Add unit test for HUDI-6941 for stages number check (#9866)

danny0405 Mon, 16 Oct 2023 21:37:45 -0700

This is an automated email from the ASF dual-hosted git repository.

danny0405 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hudi.git



The following commit(s) were added to refs/heads/master by this push:
     new d7d32154464 [HUDI-6941] Add unit test for HUDI-6941 for stages number 
check (#9866)
d7d32154464 is described below

commit d7d321544644b9e599004beddd9a3c202bc05e7d
Author: xuzifu666 <[email protected]>
AuthorDate: Tue Oct 17 12:37:29 2023 +0800

    [HUDI-6941] Add unit test for HUDI-6941 for stages number check (#9866)
---
 .../apache/spark/sql/hudi/TestInsertTable.scala    | 92 ++++++++++++++++++++++
 1 file changed, 92 insertions(+)

diff --git 
a/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/TestInsertTable.scala
 
b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/TestInsertTable.scala
index 0bd67d785d2..579ab226c4d 100644
--- 
a/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/TestInsertTable.scala
+++ 
b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/TestInsertTable.scala
@@ -1968,6 +1968,98 @@ class TestInsertTable extends HoodieSparkSqlTestBase {
     })
   }
 
+  test("Test multiple partition fields pruning") {
+
+    withRecordType()(withTempDir { tmp =>
+      val targetTable = generateTableName
+      spark.sql(
+        s"""
+           |create table ${targetTable} (
+           |  `id` string,
+           |  `name` string,
+           |  `dt` bigint,
+           |  `day` STRING,
+           |  `hour` INT
+           |) using hudi
+           |tblproperties (
+           |  'primaryKey' = 'id',
+           |  'type' = 'mor',
+           |  'preCombineField'='dt',
+           |  'hoodie.index.type' = 'BUCKET',
+           |  'hoodie.bucket.index.hash.field' = 'id',
+           |  'hoodie.bucket.index.num.buckets'=512
+           | )
+           |partitioned by (`day`,`hour`)
+           |location '${tmp.getCanonicalPath}/$targetTable'
+           |""".stripMargin)
+      spark.sql(
+        s"""
+           |insert into ${targetTable}
+           |select '1' as id, 'aa' as name, 123 as dt, '2023-10-12' as `day`, 
10 as `hour`
+           |union
+           |select '1' as id, 'aa' as name, 123 as dt, '2023-10-12' as `day`, 
11 as `hour`
+           |union
+           |select '1' as id, 'aa' as name, 123 as dt, '2023-10-12' as `day`, 
12 as `hour`
+           |""".stripMargin)
+      val df = spark.sql(
+        s"""
+           |select * from ${targetTable} where day='2023-10-12' and hour=11;
+           |""".stripMargin)
+      var rddHead = df.rdd
+      while (rddHead.dependencies.size > 0) {
+        assertResult(1)(rddHead.partitions.size)
+        rddHead = rddHead.firstParent
+      }
+      assertResult(1)(rddHead.partitions.size)
+    })
+  }
+
+  test("Test single partiton field pruning") {
+
+    withRecordType()(withTempDir { tmp =>
+      val targetTable = generateTableName
+      spark.sql(
+        s"""
+           |create table ${targetTable} (
+           |  `id` string,
+           |  `name` string,
+           |  `dt` bigint,
+           |  `day` STRING,
+           |  `hour` INT
+           |) using hudi
+           |tblproperties (
+           |  'primaryKey' = 'id',
+           |  'type' = 'mor',
+           |  'preCombineField'='dt',
+           |  'hoodie.index.type' = 'BUCKET',
+           |  'hoodie.bucket.index.hash.field' = 'id',
+           |  'hoodie.bucket.index.num.buckets'=512
+           | )
+           |partitioned by (`day`)
+           |location '${tmp.getCanonicalPath}/$targetTable'
+           |""".stripMargin)
+      spark.sql(
+        s"""
+           |insert into ${targetTable}
+           |select '1' as id, 'aa' as name, 123 as dt, '2023-10-12' as `day`, 
10 as `hour`
+           |union
+           |select '1' as id, 'aa' as name, 123 as dt, '2023-10-12' as `day`, 
11 as `hour`
+           |union
+           |select '1' as id, 'aa' as name, 123 as dt, '2023-10-12' as `day`, 
12 as `hour`
+           |""".stripMargin)
+      val df = spark.sql(
+        s"""
+           |select * from ${targetTable} where day='2023-10-12' and hour=11;
+           |""".stripMargin)
+      var rddHead = df.rdd
+      while (rddHead.dependencies.size > 0) {
+        assertResult(1)(rddHead.partitions.size)
+        rddHead = rddHead.firstParent
+      }
+      assertResult(1)(rddHead.partitions.size)
+    })
+  }
+
   def ingestAndValidateDataNoPrecombine(tableType: String, tableName: String, 
tmp: File,
                             expectedOperationtype: WriteOperationType,
                             setOptions: List[String] = List.empty) : Unit = {

[hudi] branch master updated: [HUDI-6941] Add unit test for HUDI-6941 for stages number check (#9866)

Reply via email to