This is an automated email from the ASF dual-hosted git repository.
hongze pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/incubator-gluten.git
The following commit(s) were added to refs/heads/main by this push:
new 86a683a3a [VL] Minor improvements and fixes for gluten-it and gluten-te
86a683a3a is described below
commit 86a683a3a12f6ced5a3429c716a22baa39610421
Author: Hongze Zhang <[email protected]>
AuthorDate: Tue Jul 16 15:17:27 2024 +0900
[VL] Minor improvements and fixes for gluten-it and gluten-te
---
.../main/java/org/apache/gluten/integration/BaseMixin.java | 9 ++++++---
.../gluten/integration/clickbench/ClickBenchSuite.scala | 7 +++++--
.../scala/org/apache/gluten/integration/ds/TpcdsSuite.scala | 13 ++++++-------
.../scala/org/apache/gluten/integration/h/TpchSuite.scala | 9 ++++++---
.../examples/buildhere-veloxbe-portable-libs/scripts/all.sh | 5 +++--
5 files changed, 26 insertions(+), 17 deletions(-)
diff --git
a/tools/gluten-it/common/src/main/java/org/apache/gluten/integration/BaseMixin.java
b/tools/gluten-it/common/src/main/java/org/apache/gluten/integration/BaseMixin.java
index 93c82a6fa..47aa0a0cb 100644
---
a/tools/gluten-it/common/src/main/java/org/apache/gluten/integration/BaseMixin.java
+++
b/tools/gluten-it/common/src/main/java/org/apache/gluten/integration/BaseMixin.java
@@ -48,6 +48,9 @@ public class BaseMixin {
@CommandLine.Option(names = {"--error-on-memleak"}, description = "Fail the
test when memory leak is detected by Spark's memory manager", defaultValue =
"false")
private boolean errorOnMemLeak;
+ @CommandLine.Option(names = {"--data-dir"}, description = "Location for
storing data used by tests", defaultValue = "/tmp")
+ private String dataDir;
+
@CommandLine.Option(names = {"--enable-ui"}, description = "Enable Spark
UI", defaultValue = "false")
private boolean enableUi;
@@ -129,19 +132,19 @@ public class BaseMixin {
switch (benchmarkType) {
case "h":
suite = new TpchSuite(runModeEnumeration.getSparkMasterUrl(), actions,
testConf,
- baselineConf, extraSparkConfScala, level, errorOnMemLeak, enableUi,
+ baselineConf, extraSparkConfScala, level, errorOnMemLeak, dataDir,
enableUi,
enableHsUi, hsUiPort, offHeapSize, disableAqe, disableBhj,
disableWscg, shufflePartitions, scanPartitions);
break;
case "ds":
suite = new TpcdsSuite(runModeEnumeration.getSparkMasterUrl(),
actions, testConf,
- baselineConf, extraSparkConfScala, level, errorOnMemLeak, enableUi,
+ baselineConf, extraSparkConfScala, level, errorOnMemLeak, dataDir,
enableUi,
enableHsUi, hsUiPort, offHeapSize, disableAqe, disableBhj,
disableWscg, shufflePartitions, scanPartitions);
break;
case "clickbench":
suite = new ClickBenchSuite(runModeEnumeration.getSparkMasterUrl(),
actions, testConf,
- baselineConf, extraSparkConfScala, level, errorOnMemLeak, enableUi,
+ baselineConf, extraSparkConfScala, level, errorOnMemLeak, dataDir,
enableUi,
enableHsUi, hsUiPort, offHeapSize, disableAqe, disableBhj,
disableWscg, shufflePartitions, scanPartitions);
break;
diff --git
a/tools/gluten-it/common/src/main/scala/org/apache/gluten/integration/clickbench/ClickBenchSuite.scala
b/tools/gluten-it/common/src/main/scala/org/apache/gluten/integration/clickbench/ClickBenchSuite.scala
index 04a34d332..f75431941 100644
---
a/tools/gluten-it/common/src/main/scala/org/apache/gluten/integration/clickbench/ClickBenchSuite.scala
+++
b/tools/gluten-it/common/src/main/scala/org/apache/gluten/integration/clickbench/ClickBenchSuite.scala
@@ -21,6 +21,8 @@ import org.apache.gluten.integration.{DataGen, Suite,
TableCreator}
import org.apache.log4j.Level
import org.apache.spark.SparkConf
+import java.io.File
+
/**
* ClickBench: a Benchmark For Analytical Databases
*
@@ -35,6 +37,7 @@ class ClickBenchSuite(
val extraSparkConf: Map[String, String],
val logLevel: Level,
val errorOnMemLeak: Boolean,
+ val dataDir: String,
val enableUi: Boolean,
val enableHsUi: Boolean,
val hsUiPort: Int,
@@ -69,7 +72,7 @@ class ClickBenchSuite(
scale: Double,
genPartitionedData: Boolean): String = {
checkDataGenArgs(scale, genPartitionedData)
- DATA_WRITE_PATH
+ new File(dataDir).toPath.resolve(DATA_WRITE_RELATIVE_PATH +
s"-$scale").toFile.getAbsolutePath
}
override private[integration] def createDataGen(
@@ -88,7 +91,7 @@ class ClickBenchSuite(
}
private object ClickBenchSuite {
- private val DATA_WRITE_PATH = "/tmp/clickbench-generated"
+ private val DATA_WRITE_RELATIVE_PATH = "clickbench-generated"
private val HISTORY_WRITE_PATH = "/tmp/clickbench-history"
private val ALL_QUERY_IDS = (1 to 43).map(i => s"q$i").toArray
diff --git
a/tools/gluten-it/common/src/main/scala/org/apache/gluten/integration/ds/TpcdsSuite.scala
b/tools/gluten-it/common/src/main/scala/org/apache/gluten/integration/ds/TpcdsSuite.scala
index a4365afde..190623614 100644
---
a/tools/gluten-it/common/src/main/scala/org/apache/gluten/integration/ds/TpcdsSuite.scala
+++
b/tools/gluten-it/common/src/main/scala/org/apache/gluten/integration/ds/TpcdsSuite.scala
@@ -17,15 +17,13 @@
package org.apache.gluten.integration.ds
import org.apache.gluten.integration.action.Action
-import org.apache.gluten.integration.ds.TpcdsSuite.{
- ALL_QUERY_IDS,
- HISTORY_WRITE_PATH,
- TPCDS_WRITE_PATH
-}
+import org.apache.gluten.integration.ds.TpcdsSuite.{ALL_QUERY_IDS,
HISTORY_WRITE_PATH, TPCDS_WRITE_RELATIVE_PATH}
import org.apache.gluten.integration.{DataGen, Suite, TableCreator,
TypeModifier}
import org.apache.log4j.Level
import org.apache.spark.SparkConf
+import java.io.File
+
class TpcdsSuite(
val masterUrl: String,
val actions: Array[Action],
@@ -34,6 +32,7 @@ class TpcdsSuite(
val extraSparkConf: Map[String, String],
val logLevel: Level,
val errorOnMemLeak: Boolean,
+ val dataDir: String,
val enableUi: Boolean,
val enableHsUi: Boolean,
val hsUiPort: Int,
@@ -66,7 +65,7 @@ class TpcdsSuite(
override private[integration] def dataWritePath(
scale: Double,
genPartitionedData: Boolean): String =
- TPCDS_WRITE_PATH + s"-$scale"
+ new File(dataDir).toPath.resolve(TPCDS_WRITE_RELATIVE_PATH +
s"-$scale").toFile.getAbsolutePath
override private[integration] def createDataGen(
scale: Double,
@@ -95,7 +94,7 @@ class TpcdsSuite(
}
object TpcdsSuite {
- private val TPCDS_WRITE_PATH = "/tmp/tpcds-generated"
+ private val TPCDS_WRITE_RELATIVE_PATH = "tpcds-generated"
private val ALL_QUERY_IDS = Array(
"q1",
"q2",
diff --git
a/tools/gluten-it/common/src/main/scala/org/apache/gluten/integration/h/TpchSuite.scala
b/tools/gluten-it/common/src/main/scala/org/apache/gluten/integration/h/TpchSuite.scala
index bdcac3bff..86fcaea0a 100644
---
a/tools/gluten-it/common/src/main/scala/org/apache/gluten/integration/h/TpchSuite.scala
+++
b/tools/gluten-it/common/src/main/scala/org/apache/gluten/integration/h/TpchSuite.scala
@@ -17,11 +17,13 @@
package org.apache.gluten.integration.h
import org.apache.gluten.integration.action.Action
-import org.apache.gluten.integration.h.TpchSuite.{HISTORY_WRITE_PATH,
TPCH_WRITE_PATH}
+import org.apache.gluten.integration.h.TpchSuite.{HISTORY_WRITE_PATH,
TPCH_WRITE_RELATIVE_PATH}
import org.apache.gluten.integration.{DataGen, Suite, TableCreator,
TypeModifier}
import org.apache.log4j.Level
import org.apache.spark.SparkConf
+import java.io.File
+
class TpchSuite(
val masterUrl: String,
val actions: Array[Action],
@@ -30,6 +32,7 @@ class TpchSuite(
val extraSparkConf: Map[String, String],
val logLevel: Level,
val errorOnMemLeak: Boolean,
+ val dataDir: String,
val enableUi: Boolean,
val enableHsUi: Boolean,
val hsUiPort: Int,
@@ -62,7 +65,7 @@ class TpchSuite(
override private[integration] def dataWritePath(
scale: Double,
genPartitionedData: Boolean): String =
- TPCH_WRITE_PATH + s"-$scale"
+ new File(dataDir).toPath.resolve(TPCH_WRITE_RELATIVE_PATH +
s"-$scale").toFile.getAbsolutePath
override private[integration] def createDataGen(
scale: Double,
@@ -90,7 +93,7 @@ class TpchSuite(
}
object TpchSuite {
- private val TPCH_WRITE_PATH = "/tmp/tpch-generated"
+ private val TPCH_WRITE_RELATIVE_PATH = "tpch-generated"
private val ALL_QUERY_IDS = Array(
"q1",
"q2",
diff --git
a/tools/gluten-te/ubuntu/examples/buildhere-veloxbe-portable-libs/scripts/all.sh
b/tools/gluten-te/ubuntu/examples/buildhere-veloxbe-portable-libs/scripts/all.sh
index 9fe627b99..267423552 100755
---
a/tools/gluten-te/ubuntu/examples/buildhere-veloxbe-portable-libs/scripts/all.sh
+++
b/tools/gluten-te/ubuntu/examples/buildhere-veloxbe-portable-libs/scripts/all.sh
@@ -19,7 +19,7 @@ set -ex
export NUM_THREADS=$(nproc)
export CMAKE_BUILD_PARALLEL_LEVEL=$(nproc)
-# Retry code that copied from https://unix.stackexchange.com/a/137639.
+# Retry code copied from https://unix.stackexchange.com/a/137639.
function fail {
echo $1 >&2
exit 1
@@ -43,6 +43,7 @@ function retry {
}
cd /opt/gluten
-retry apt-get install curl zip unzip tar pkg-config autoconf-archive bison flex
+retry apt-get update
+retry apt-get install -y curl zip unzip tar pkg-config autoconf-archive bison
flex
retry source ./dev/vcpkg/env.sh
retry dev/builddeps-veloxbe.sh --build_tests=OFF --build_benchmarks=OFF
--enable_s3=ON --enable_gcs=ON --enable_hdfs=ON --enable_abfs=ON
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]