This is an automated email from the ASF dual-hosted git repository.

hongze pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/incubator-gluten.git


The following commit(s) were added to refs/heads/main by this push:
     new fd06368f3 [VL] Daily Update Velox Version (2024_04_12) (#5375)
fd06368f3 is described below

commit fd06368f3f1b3ac0495b22129e2562ada842da6b
Author: Gluten Performance Bot 
<[email protected]>
AuthorDate: Fri Apr 12 19:23:03 2024 +0800

    [VL] Daily Update Velox Version (2024_04_12) (#5375)
    
    Signed-off-by: glutenperfbot <[email protected]>
    Co-authored-by: glutenperfbot <[email protected]>
    Co-authored-by: Hongze Zhang <[email protected]>
---
 .github/workflows/velox_docker.yml                 |  3 ++-
 ep/build-velox/src/get_velox.sh                    |  2 +-
 .../scala/org/apache/gluten/GlutenConfig.scala     |  9 +++++++
 .../integration/tpc/command/Parameterized.java     | 28 ++++++++++++++++------
 .../integration/tpc/action/Parameterized.scala     | 17 +++++++++++++
 5 files changed, 50 insertions(+), 9 deletions(-)

diff --git a/.github/workflows/velox_docker.yml 
b/.github/workflows/velox_docker.yml
index 07960813e..8515d79d2 100644
--- a/.github/workflows/velox_docker.yml
+++ b/.github/workflows/velox_docker.yml
@@ -245,7 +245,8 @@ jobs:
             -d=OFFHEAP_SIZE:6g,spark.memory.offHeap.size=6g \
             -d=OFFHEAP_SIZE:4g,spark.memory.offHeap.size=4g \
             
-d=OVER_ACQUIRE:0.3,spark.gluten.memory.overAcquiredMemoryRatio=0.3 \
-            -d=OVER_ACQUIRE:0.5,spark.gluten.memory.overAcquiredMemoryRatio=0.5
+            
-d=OVER_ACQUIRE:0.5,spark.gluten.memory.overAcquiredMemoryRatio=0.5 \
+            --excluded-dims=OFFHEAP_SIZE:4g
       - name: TPC-DS SF30.0 Parquet local spark3.2 Q67 low memory, memory 
isolation on
         run: |
           cd tools/gluten-it \
diff --git a/ep/build-velox/src/get_velox.sh b/ep/build-velox/src/get_velox.sh
index fa6deb374..32e33ed48 100755
--- a/ep/build-velox/src/get_velox.sh
+++ b/ep/build-velox/src/get_velox.sh
@@ -17,7 +17,7 @@
 set -exu
 
 VELOX_REPO=https://github.com/oap-project/velox.git
-VELOX_BRANCH=2024_04_11
+VELOX_BRANCH=2024_04_12
 VELOX_HOME=""
 
 #Set on run gluten on HDFS
diff --git a/shims/common/src/main/scala/org/apache/gluten/GlutenConfig.scala 
b/shims/common/src/main/scala/org/apache/gluten/GlutenConfig.scala
index bc98ea25e..cfaf54cdd 100644
--- a/shims/common/src/main/scala/org/apache/gluten/GlutenConfig.scala
+++ b/shims/common/src/main/scala/org/apache/gluten/GlutenConfig.scala
@@ -256,6 +256,8 @@ class GlutenConfig(conf: SQLConf) extends Logging {
 
   def veloxSpillStrategy: String = conf.getConf(COLUMNAR_VELOX_SPILL_STRATEGY)
 
+  def veloxMaxSpillLevel: Int = conf.getConf(COLUMNAR_VELOX_MAX_SPILL_LEVEL)
+
   def veloxMaxSpillFileSize: Long = 
conf.getConf(COLUMNAR_VELOX_MAX_SPILL_FILE_SIZE)
 
   def veloxSpillFileSystem: String = 
conf.getConf(COLUMNAR_VELOX_SPILL_FILE_SYSTEM)
@@ -1249,6 +1251,13 @@ object GlutenConfig {
       .checkValues(Set("none", "auto"))
       .createWithDefault("auto")
 
+  val COLUMNAR_VELOX_MAX_SPILL_LEVEL =
+    buildConf("spark.gluten.sql.columnar.backend.velox.maxSpillLevel")
+      .internal()
+      .doc("The max allowed spilling level with zero being the initial 
spilling level")
+      .intConf
+      .createWithDefault(4)
+
   val COLUMNAR_VELOX_MAX_SPILL_FILE_SIZE =
     buildConf("spark.gluten.sql.columnar.backend.velox.maxSpillFileSize")
       .internal()
diff --git 
a/tools/gluten-it/common/src/main/java/org/apache/gluten/integration/tpc/command/Parameterized.java
 
b/tools/gluten-it/common/src/main/java/org/apache/gluten/integration/tpc/command/Parameterized.java
index 7c9c64399..1f94cb256 100644
--- 
a/tools/gluten-it/common/src/main/java/org/apache/gluten/integration/tpc/command/Parameterized.java
+++ 
b/tools/gluten-it/common/src/main/java/org/apache/gluten/integration/tpc/command/Parameterized.java
@@ -16,8 +16,10 @@
  */
 package org.apache.gluten.integration.tpc.command;
 
+import com.google.common.base.Preconditions;
 import org.apache.gluten.integration.tpc.TpcMixin;
 import org.apache.gluten.integration.tpc.action.Dim;
+import org.apache.gluten.integration.tpc.action.DimKv;
 import org.apache.gluten.integration.tpc.action.DimValue;
 import org.apache.commons.lang3.ArrayUtils;
 import picocli.CommandLine;
@@ -25,12 +27,7 @@ import scala.Tuple2;
 import scala.collection.JavaConverters;
 import scala.collection.Seq;
 
-import java.util.AbstractMap;
-import java.util.ArrayList;
-import java.util.HashMap;
-import java.util.LinkedHashMap;
-import java.util.List;
-import java.util.Map;
+import java.util.*;
 import java.util.concurrent.Callable;
 import java.util.regex.Matcher;
 import java.util.regex.Pattern;
@@ -64,13 +61,30 @@ public class Parameterized implements Callable<Integer> {
   @CommandLine.Option(names = {"-d", "--dim"}, description = "Set a series of 
dimensions consisting of possible config options, example: 
-d=offheap:1g,spark.memory.offHeap.enabled=true,spark.memory.offHeap.size=1g")
   private String[] dims = new String[0];
 
+  @CommandLine.Option(names = {"--excluded-dims"}, description = "Set a series 
of comma-separated list of dimension combinations to exclude. Example: 
--exclude-dims=offheap:1g,aqe=on")
+  private String[] excludedDims = new String[0];
+
   private static final Pattern dimPattern1 = 
Pattern.compile("([\\w-]+):([^,:]+)((?:,[^=,]+=[^=,]+)+)");
   private static final Pattern dimPattern2 = 
Pattern.compile("([^,:]+)((?:,[^=,]+=[^=,]+)+)");
 
+  private static final Pattern excludedDimsPattern = 
Pattern.compile("[\\w-]+:[^,:]+(?:,[\\w-]+:[^,:]+)*");
   @Override
   public Integer call() throws Exception {
     final Map<String, Map<String, List<Map.Entry<String, String>>>> parsed = 
new HashMap<>();
 
+    final Seq<scala.collection.immutable.Set<DimKv>> excludedCombinations = 
JavaConverters.asScalaBufferConverter(Arrays.stream(excludedDims).map(d -> {
+      final Matcher m = excludedDimsPattern.matcher(d);
+      Preconditions.checkArgument(m.matches(), "Unrecognizable excluded dims: 
" + d);
+      Set<DimKv> out = new HashSet<>();
+      final String[] dims = d.split(",");
+      for (String dim : dims) {
+        final String[] kv = dim.split(":");
+        Preconditions.checkArgument(kv.length == 2, "Unrecognizable excluded 
dims: " + d);
+        out.add(new DimKv(kv[0], kv[1]));
+      }
+      return JavaConverters.asScalaSetConverter(out).asScala().<DimKv>toSet();
+    }).collect(Collectors.toList())).asScala();
+
     // parse dims
     for (String dim : dims) {
       Matcher matcher1 = dimPattern1.matcher(dim);
@@ -122,7 +136,7 @@ public class Parameterized implements Callable<Integer> {
             )).collect(Collectors.toList())).asScala();
 
     org.apache.gluten.integration.tpc.action.Parameterized parameterized =
-        new 
org.apache.gluten.integration.tpc.action.Parameterized(dataGenMixin.getScale(), 
this.queries, excludedQueries, iterations, warmupIterations, parsedDims, 
metrics);
+        new 
org.apache.gluten.integration.tpc.action.Parameterized(dataGenMixin.getScale(), 
this.queries, excludedQueries, iterations, warmupIterations, parsedDims, 
excludedCombinations, metrics);
     return mixin.runActions(ArrayUtils.addAll(dataGenMixin.makeActions(), 
parameterized));
   }
 }
diff --git 
a/tools/gluten-it/common/src/main/scala/org/apache/gluten/integration/tpc/action/Parameterized.scala
 
b/tools/gluten-it/common/src/main/scala/org/apache/gluten/integration/tpc/action/Parameterized.scala
index c1a2a1085..0842eb80e 100644
--- 
a/tools/gluten-it/common/src/main/scala/org/apache/gluten/integration/tpc/action/Parameterized.scala
+++ 
b/tools/gluten-it/common/src/main/scala/org/apache/gluten/integration/tpc/action/Parameterized.scala
@@ -35,6 +35,7 @@ class Parameterized(
     iterations: Int,
     warmupIterations: Int,
     configDimensions: Seq[Dim],
+    excludedCombinations: Seq[Set[DimKv]],
     metrics: Array[String])
   extends Action {
 
@@ -70,6 +71,16 @@ class Parameterized(
         intermediateConf: Seq[(String, String)]): Unit = {
       if (dimOffset == dimCount) {
         // we got one coordinate
+        excludedCombinations.foreach {
+          ec: Set[DimKv] =>
+            if (ec.forall {
+              kv =>
+                intermediateCoordinates.contains(kv.k) && 
intermediateCoordinates(kv.k) == kv.v
+            }) {
+              println(s"Coordinate ${Coordinate(intermediateCoordinates)} 
excluded by $ec.")
+              return
+            }
+        }
         coordinateMap(Coordinate(intermediateCoordinates)) = intermediateConf
         return
       }
@@ -95,6 +106,11 @@ class Parameterized(
     val sessionSwitcher = tpcSuite.sessionSwitcher
     val testConf = tpcSuite.getTestConf()
 
+    println("Prepared coordinates: ")
+    coordinates.toList.map(_._1).zipWithIndex.foreach {
+      case (c, idx) =>
+        println(s"  $idx: $c")
+    }
     coordinates.foreach {
       entry =>
         // register one session per coordinate
@@ -176,6 +192,7 @@ class Parameterized(
   }
 }
 
+case class DimKv(k: String, v: String)
 case class Dim(name: String, dimValues: Seq[DimValue])
 case class DimValue(name: String, conf: Seq[(String, String)])
 case class Coordinate(coordinate: Map[String, String]) // [dim, dim value]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to