This is an automated email from the ASF dual-hosted git repository.
hongze pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/incubator-gluten.git
The following commit(s) were added to refs/heads/main by this push:
new fd06368f3 [VL] Daily Update Velox Version (2024_04_12) (#5375)
fd06368f3 is described below
commit fd06368f3f1b3ac0495b22129e2562ada842da6b
Author: Gluten Performance Bot
<[email protected]>
AuthorDate: Fri Apr 12 19:23:03 2024 +0800
[VL] Daily Update Velox Version (2024_04_12) (#5375)
Signed-off-by: glutenperfbot <[email protected]>
Co-authored-by: glutenperfbot <[email protected]>
Co-authored-by: Hongze Zhang <[email protected]>
---
.github/workflows/velox_docker.yml | 3 ++-
ep/build-velox/src/get_velox.sh | 2 +-
.../scala/org/apache/gluten/GlutenConfig.scala | 9 +++++++
.../integration/tpc/command/Parameterized.java | 28 ++++++++++++++++------
.../integration/tpc/action/Parameterized.scala | 17 +++++++++++++
5 files changed, 50 insertions(+), 9 deletions(-)
diff --git a/.github/workflows/velox_docker.yml
b/.github/workflows/velox_docker.yml
index 07960813e..8515d79d2 100644
--- a/.github/workflows/velox_docker.yml
+++ b/.github/workflows/velox_docker.yml
@@ -245,7 +245,8 @@ jobs:
-d=OFFHEAP_SIZE:6g,spark.memory.offHeap.size=6g \
-d=OFFHEAP_SIZE:4g,spark.memory.offHeap.size=4g \
-d=OVER_ACQUIRE:0.3,spark.gluten.memory.overAcquiredMemoryRatio=0.3 \
- -d=OVER_ACQUIRE:0.5,spark.gluten.memory.overAcquiredMemoryRatio=0.5
+
-d=OVER_ACQUIRE:0.5,spark.gluten.memory.overAcquiredMemoryRatio=0.5 \
+ --excluded-dims=OFFHEAP_SIZE:4g
- name: TPC-DS SF30.0 Parquet local spark3.2 Q67 low memory, memory
isolation on
run: |
cd tools/gluten-it \
diff --git a/ep/build-velox/src/get_velox.sh b/ep/build-velox/src/get_velox.sh
index fa6deb374..32e33ed48 100755
--- a/ep/build-velox/src/get_velox.sh
+++ b/ep/build-velox/src/get_velox.sh
@@ -17,7 +17,7 @@
set -exu
VELOX_REPO=https://github.com/oap-project/velox.git
-VELOX_BRANCH=2024_04_11
+VELOX_BRANCH=2024_04_12
VELOX_HOME=""
#Set on run gluten on HDFS
diff --git a/shims/common/src/main/scala/org/apache/gluten/GlutenConfig.scala
b/shims/common/src/main/scala/org/apache/gluten/GlutenConfig.scala
index bc98ea25e..cfaf54cdd 100644
--- a/shims/common/src/main/scala/org/apache/gluten/GlutenConfig.scala
+++ b/shims/common/src/main/scala/org/apache/gluten/GlutenConfig.scala
@@ -256,6 +256,8 @@ class GlutenConfig(conf: SQLConf) extends Logging {
def veloxSpillStrategy: String = conf.getConf(COLUMNAR_VELOX_SPILL_STRATEGY)
+ def veloxMaxSpillLevel: Int = conf.getConf(COLUMNAR_VELOX_MAX_SPILL_LEVEL)
+
def veloxMaxSpillFileSize: Long =
conf.getConf(COLUMNAR_VELOX_MAX_SPILL_FILE_SIZE)
def veloxSpillFileSystem: String =
conf.getConf(COLUMNAR_VELOX_SPILL_FILE_SYSTEM)
@@ -1249,6 +1251,13 @@ object GlutenConfig {
.checkValues(Set("none", "auto"))
.createWithDefault("auto")
+ val COLUMNAR_VELOX_MAX_SPILL_LEVEL =
+ buildConf("spark.gluten.sql.columnar.backend.velox.maxSpillLevel")
+ .internal()
+ .doc("The max allowed spilling level with zero being the initial
spilling level")
+ .intConf
+ .createWithDefault(4)
+
val COLUMNAR_VELOX_MAX_SPILL_FILE_SIZE =
buildConf("spark.gluten.sql.columnar.backend.velox.maxSpillFileSize")
.internal()
diff --git
a/tools/gluten-it/common/src/main/java/org/apache/gluten/integration/tpc/command/Parameterized.java
b/tools/gluten-it/common/src/main/java/org/apache/gluten/integration/tpc/command/Parameterized.java
index 7c9c64399..1f94cb256 100644
---
a/tools/gluten-it/common/src/main/java/org/apache/gluten/integration/tpc/command/Parameterized.java
+++
b/tools/gluten-it/common/src/main/java/org/apache/gluten/integration/tpc/command/Parameterized.java
@@ -16,8 +16,10 @@
*/
package org.apache.gluten.integration.tpc.command;
+import com.google.common.base.Preconditions;
import org.apache.gluten.integration.tpc.TpcMixin;
import org.apache.gluten.integration.tpc.action.Dim;
+import org.apache.gluten.integration.tpc.action.DimKv;
import org.apache.gluten.integration.tpc.action.DimValue;
import org.apache.commons.lang3.ArrayUtils;
import picocli.CommandLine;
@@ -25,12 +27,7 @@ import scala.Tuple2;
import scala.collection.JavaConverters;
import scala.collection.Seq;
-import java.util.AbstractMap;
-import java.util.ArrayList;
-import java.util.HashMap;
-import java.util.LinkedHashMap;
-import java.util.List;
-import java.util.Map;
+import java.util.*;
import java.util.concurrent.Callable;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
@@ -64,13 +61,30 @@ public class Parameterized implements Callable<Integer> {
@CommandLine.Option(names = {"-d", "--dim"}, description = "Set a series of
dimensions consisting of possible config options, example:
-d=offheap:1g,spark.memory.offHeap.enabled=true,spark.memory.offHeap.size=1g")
private String[] dims = new String[0];
+ @CommandLine.Option(names = {"--excluded-dims"}, description = "Set a series
of comma-separated list of dimension combinations to exclude. Example:
--exclude-dims=offheap:1g,aqe=on")
+ private String[] excludedDims = new String[0];
+
private static final Pattern dimPattern1 =
Pattern.compile("([\\w-]+):([^,:]+)((?:,[^=,]+=[^=,]+)+)");
private static final Pattern dimPattern2 =
Pattern.compile("([^,:]+)((?:,[^=,]+=[^=,]+)+)");
+ private static final Pattern excludedDimsPattern =
Pattern.compile("[\\w-]+:[^,:]+(?:,[\\w-]+:[^,:]+)*");
@Override
public Integer call() throws Exception {
final Map<String, Map<String, List<Map.Entry<String, String>>>> parsed =
new HashMap<>();
+ final Seq<scala.collection.immutable.Set<DimKv>> excludedCombinations =
JavaConverters.asScalaBufferConverter(Arrays.stream(excludedDims).map(d -> {
+ final Matcher m = excludedDimsPattern.matcher(d);
+ Preconditions.checkArgument(m.matches(), "Unrecognizable excluded dims:
" + d);
+ Set<DimKv> out = new HashSet<>();
+ final String[] dims = d.split(",");
+ for (String dim : dims) {
+ final String[] kv = dim.split(":");
+ Preconditions.checkArgument(kv.length == 2, "Unrecognizable excluded
dims: " + d);
+ out.add(new DimKv(kv[0], kv[1]));
+ }
+ return JavaConverters.asScalaSetConverter(out).asScala().<DimKv>toSet();
+ }).collect(Collectors.toList())).asScala();
+
// parse dims
for (String dim : dims) {
Matcher matcher1 = dimPattern1.matcher(dim);
@@ -122,7 +136,7 @@ public class Parameterized implements Callable<Integer> {
)).collect(Collectors.toList())).asScala();
org.apache.gluten.integration.tpc.action.Parameterized parameterized =
- new
org.apache.gluten.integration.tpc.action.Parameterized(dataGenMixin.getScale(),
this.queries, excludedQueries, iterations, warmupIterations, parsedDims,
metrics);
+ new
org.apache.gluten.integration.tpc.action.Parameterized(dataGenMixin.getScale(),
this.queries, excludedQueries, iterations, warmupIterations, parsedDims,
excludedCombinations, metrics);
return mixin.runActions(ArrayUtils.addAll(dataGenMixin.makeActions(),
parameterized));
}
}
diff --git
a/tools/gluten-it/common/src/main/scala/org/apache/gluten/integration/tpc/action/Parameterized.scala
b/tools/gluten-it/common/src/main/scala/org/apache/gluten/integration/tpc/action/Parameterized.scala
index c1a2a1085..0842eb80e 100644
---
a/tools/gluten-it/common/src/main/scala/org/apache/gluten/integration/tpc/action/Parameterized.scala
+++
b/tools/gluten-it/common/src/main/scala/org/apache/gluten/integration/tpc/action/Parameterized.scala
@@ -35,6 +35,7 @@ class Parameterized(
iterations: Int,
warmupIterations: Int,
configDimensions: Seq[Dim],
+ excludedCombinations: Seq[Set[DimKv]],
metrics: Array[String])
extends Action {
@@ -70,6 +71,16 @@ class Parameterized(
intermediateConf: Seq[(String, String)]): Unit = {
if (dimOffset == dimCount) {
// we got one coordinate
+ excludedCombinations.foreach {
+ ec: Set[DimKv] =>
+ if (ec.forall {
+ kv =>
+ intermediateCoordinates.contains(kv.k) &&
intermediateCoordinates(kv.k) == kv.v
+ }) {
+ println(s"Coordinate ${Coordinate(intermediateCoordinates)}
excluded by $ec.")
+ return
+ }
+ }
coordinateMap(Coordinate(intermediateCoordinates)) = intermediateConf
return
}
@@ -95,6 +106,11 @@ class Parameterized(
val sessionSwitcher = tpcSuite.sessionSwitcher
val testConf = tpcSuite.getTestConf()
+ println("Prepared coordinates: ")
+ coordinates.toList.map(_._1).zipWithIndex.foreach {
+ case (c, idx) =>
+ println(s" $idx: $c")
+ }
coordinates.foreach {
entry =>
// register one session per coordinate
@@ -176,6 +192,7 @@ class Parameterized(
}
}
+case class DimKv(k: String, v: String)
case class Dim(name: String, dimValues: Seq[DimValue])
case class DimValue(name: String, conf: Seq[(String, String)])
case class Coordinate(coordinate: Map[String, String]) // [dim, dim value]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]