This is an automated email from the ASF dual-hosted git repository.

hongze pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/incubator-gluten.git


The following commit(s) were added to refs/heads/main by this push:
     new 785958e0d [VL] Gluten-it: Add option --scan-partitions (#5958)
785958e0d is described below

commit 785958e0d3693cf7529237119a6c918d8639a833
Author: Hongze Zhang <[email protected]>
AuthorDate: Tue Jun 4 10:42:49 2024 +0800

    [VL] Gluten-it: Add option --scan-partitions (#5958)
---
 .../org/apache/gluten/integration/BaseMixin.java    | 12 ++++++------
 .../scala/org/apache/gluten/integration/Suite.scala | 21 +++++++++++----------
 .../integration/clickbench/ClickBenchSuite.scala    |  4 ++--
 .../apache/gluten/integration/ds/TpcdsSuite.scala   |  4 ++--
 .../org/apache/gluten/integration/h/TpchSuite.scala |  4 ++--
 5 files changed, 23 insertions(+), 22 deletions(-)

diff --git 
a/tools/gluten-it/common/src/main/java/org/apache/gluten/integration/BaseMixin.java
 
b/tools/gluten-it/common/src/main/java/org/apache/gluten/integration/BaseMixin.java
index 41d244871..dc1691e50 100644
--- 
a/tools/gluten-it/common/src/main/java/org/apache/gluten/integration/BaseMixin.java
+++ 
b/tools/gluten-it/common/src/main/java/org/apache/gluten/integration/BaseMixin.java
@@ -72,11 +72,11 @@ public class BaseMixin {
   @CommandLine.Option(names = {"--disable-wscg"}, description = "Disable Spark 
SQL whole stage code generation", defaultValue = "false")
   private boolean disableWscg;
 
-  @CommandLine.Option(names = {"--shuffle-partitions"}, description = 
"Generate data with partitions", defaultValue = "100")
+  @CommandLine.Option(names = {"--shuffle-partitions"}, description = "Shuffle 
partition number", defaultValue = "100")
   private int shufflePartitions;
 
-  @CommandLine.Option(names = {"--min-scan-partitions"}, description = "Use 
minimum number of partitions to read data", defaultValue = "false")
-  private boolean minimumScanPartitions;
+  @CommandLine.Option(names = {"--scan-partitions"}, description = "Scan 
partition number. This is an approximate value, so the actual scan partition 
number might vary around this value", defaultValue = "100")
+  private int scanPartitions;
 
   @CommandLine.Option(names = {"--extra-conf"}, description = "Extra Spark 
config entries applying to generated Spark session. E.g. --extra-conf=k1=v1 
--extra-conf=k2=v2")
   private Map<String, String> extraSparkConf = Collections.emptyMap();
@@ -131,19 +131,19 @@ public class BaseMixin {
         suite = new TpchSuite(runModeEnumeration.getSparkMasterUrl(), actions, 
testConf,
             baselineConf, extraSparkConfScala, level, errorOnMemLeak, enableUi,
             enableHsUi, hsUiPort, offHeapSize, disableAqe, disableBhj,
-            disableWscg, shufflePartitions, minimumScanPartitions);
+            disableWscg, shufflePartitions, scanPartitions);
         break;
       case "ds":
         suite = new TpcdsSuite(runModeEnumeration.getSparkMasterUrl(), 
actions, testConf,
             baselineConf, extraSparkConfScala, level, errorOnMemLeak, enableUi,
             enableHsUi, hsUiPort, offHeapSize, disableAqe, disableBhj,
-            disableWscg, shufflePartitions, minimumScanPartitions);
+            disableWscg, shufflePartitions, scanPartitions);
         break;
       case "clickbench":
         suite = new ClickBenchSuite(runModeEnumeration.getSparkMasterUrl(), 
actions, testConf,
             baselineConf, extraSparkConfScala, level, errorOnMemLeak, enableUi,
             enableHsUi, hsUiPort, offHeapSize, disableAqe, disableBhj,
-            disableWscg, shufflePartitions, minimumScanPartitions);
+            disableWscg, shufflePartitions, scanPartitions);
         break;
       default:
         throw new IllegalArgumentException("TPC benchmark type not found: " + 
benchmarkType);
diff --git 
a/tools/gluten-it/common/src/main/scala/org/apache/gluten/integration/Suite.scala
 
b/tools/gluten-it/common/src/main/scala/org/apache/gluten/integration/Suite.scala
index 9e31e1171..bb5cb1889 100644
--- 
a/tools/gluten-it/common/src/main/scala/org/apache/gluten/integration/Suite.scala
+++ 
b/tools/gluten-it/common/src/main/scala/org/apache/gluten/integration/Suite.scala
@@ -43,7 +43,7 @@ abstract class Suite(
     private val disableBhj: Boolean,
     private val disableWscg: Boolean,
     private val shufflePartitions: Int,
-    private val minimumScanPartitions: Boolean) {
+    private val scanPartitions: Int) {
 
   resetLogLevel()
 
@@ -103,15 +103,16 @@ abstract class Suite(
     
sessionSwitcher.defaultConf().setWarningOnOverriding("spark.sql.codegen.wholeStage",
 "false")
   }
 
-  if (minimumScanPartitions) {
-    sessionSwitcher
-      .defaultConf()
-      .setWarningOnOverriding("spark.sql.files.maxPartitionBytes", 
s"${ByteUnit.PiB.toBytes(1L)}")
-    sessionSwitcher
-      .defaultConf()
-      .setWarningOnOverriding("spark.sql.files.openCostInBytes", 
s"${ByteUnit.PiB.toBytes(1L)}")
-    
sessionSwitcher.defaultConf().setWarningOnOverriding("spark.default.parallelism",
 "1")
-  }
+  // Scan partition number.
+  sessionSwitcher
+    .defaultConf()
+    .setWarningOnOverriding("spark.sql.files.maxPartitionBytes", 
s"${ByteUnit.PiB.toBytes(1L)}")
+  sessionSwitcher
+    .defaultConf()
+    .setWarningOnOverriding("spark.sql.files.openCostInBytes", "0")
+  sessionSwitcher
+    .defaultConf()
+    .setWarningOnOverriding("spark.sql.files.minPartitionNum", 
s"${(scanPartitions - 1) max 1}")
 
   extraSparkConf.toStream.foreach { kv =>
     sessionSwitcher.defaultConf().setWarningOnOverriding(kv._1, kv._2)
diff --git 
a/tools/gluten-it/common/src/main/scala/org/apache/gluten/integration/clickbench/ClickBenchSuite.scala
 
b/tools/gluten-it/common/src/main/scala/org/apache/gluten/integration/clickbench/ClickBenchSuite.scala
index deffdb7e5..04a34d332 100644
--- 
a/tools/gluten-it/common/src/main/scala/org/apache/gluten/integration/clickbench/ClickBenchSuite.scala
+++ 
b/tools/gluten-it/common/src/main/scala/org/apache/gluten/integration/clickbench/ClickBenchSuite.scala
@@ -43,7 +43,7 @@ class ClickBenchSuite(
     val disableBhj: Boolean,
     val disableWscg: Boolean,
     val shufflePartitions: Int,
-    val minimumScanPartitions: Boolean)
+    val scanPartitions: Int)
     extends Suite(
       masterUrl,
       actions,
@@ -60,7 +60,7 @@ class ClickBenchSuite(
       disableBhj,
       disableWscg,
       shufflePartitions,
-      minimumScanPartitions) {
+      scanPartitions) {
   import ClickBenchSuite._
 
   override protected def historyWritePath(): String = HISTORY_WRITE_PATH
diff --git 
a/tools/gluten-it/common/src/main/scala/org/apache/gluten/integration/ds/TpcdsSuite.scala
 
b/tools/gluten-it/common/src/main/scala/org/apache/gluten/integration/ds/TpcdsSuite.scala
index 339e89d5b..a4365afde 100644
--- 
a/tools/gluten-it/common/src/main/scala/org/apache/gluten/integration/ds/TpcdsSuite.scala
+++ 
b/tools/gluten-it/common/src/main/scala/org/apache/gluten/integration/ds/TpcdsSuite.scala
@@ -42,7 +42,7 @@ class TpcdsSuite(
     val disableBhj: Boolean,
     val disableWscg: Boolean,
     val shufflePartitions: Int,
-    val minimumScanPartitions: Boolean)
+    val scanPartitions: Int)
     extends Suite(
       masterUrl,
       actions,
@@ -59,7 +59,7 @@ class TpcdsSuite(
       disableBhj,
       disableWscg,
       shufflePartitions,
-      minimumScanPartitions) {
+      scanPartitions) {
 
   override protected def historyWritePath(): String = HISTORY_WRITE_PATH
 
diff --git 
a/tools/gluten-it/common/src/main/scala/org/apache/gluten/integration/h/TpchSuite.scala
 
b/tools/gluten-it/common/src/main/scala/org/apache/gluten/integration/h/TpchSuite.scala
index 29c299bee..bdcac3bff 100644
--- 
a/tools/gluten-it/common/src/main/scala/org/apache/gluten/integration/h/TpchSuite.scala
+++ 
b/tools/gluten-it/common/src/main/scala/org/apache/gluten/integration/h/TpchSuite.scala
@@ -38,7 +38,7 @@ class TpchSuite(
     val disableBhj: Boolean,
     val disableWscg: Boolean,
     val shufflePartitions: Int,
-    val minimumScanPartitions: Boolean)
+    val scanPartitions: Int)
     extends Suite(
       masterUrl,
       actions,
@@ -55,7 +55,7 @@ class TpchSuite(
       disableBhj,
       disableWscg,
       shufflePartitions,
-      minimumScanPartitions) {
+      scanPartitions) {
 
   override protected def historyWritePath(): String = HISTORY_WRITE_PATH
 


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to