This is an automated email from the ASF dual-hosted git repository.

hongze pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/incubator-gluten.git


The following commit(s) were added to refs/heads/main by this push:
     new 86a683a3a [VL] Minor improvements and fixes for gluten-it and gluten-te
86a683a3a is described below

commit 86a683a3a12f6ced5a3429c716a22baa39610421
Author: Hongze Zhang <[email protected]>
AuthorDate: Tue Jul 16 15:17:27 2024 +0900

    [VL] Minor improvements and fixes for gluten-it and gluten-te
---
 .../main/java/org/apache/gluten/integration/BaseMixin.java  |  9 ++++++---
 .../gluten/integration/clickbench/ClickBenchSuite.scala     |  7 +++++--
 .../scala/org/apache/gluten/integration/ds/TpcdsSuite.scala | 13 ++++++-------
 .../scala/org/apache/gluten/integration/h/TpchSuite.scala   |  9 ++++++---
 .../examples/buildhere-veloxbe-portable-libs/scripts/all.sh |  5 +++--
 5 files changed, 26 insertions(+), 17 deletions(-)

diff --git 
a/tools/gluten-it/common/src/main/java/org/apache/gluten/integration/BaseMixin.java
 
b/tools/gluten-it/common/src/main/java/org/apache/gluten/integration/BaseMixin.java
index 93c82a6fa..47aa0a0cb 100644
--- 
a/tools/gluten-it/common/src/main/java/org/apache/gluten/integration/BaseMixin.java
+++ 
b/tools/gluten-it/common/src/main/java/org/apache/gluten/integration/BaseMixin.java
@@ -48,6 +48,9 @@ public class BaseMixin {
   @CommandLine.Option(names = {"--error-on-memleak"}, description = "Fail the 
test when memory leak is detected by Spark's memory manager", defaultValue = 
"false")
   private boolean errorOnMemLeak;
 
+  @CommandLine.Option(names = {"--data-dir"}, description = "Location for 
storing data used by tests", defaultValue = "/tmp")
+  private String dataDir;
+
   @CommandLine.Option(names = {"--enable-ui"}, description = "Enable Spark 
UI", defaultValue = "false")
   private boolean enableUi;
 
@@ -129,19 +132,19 @@ public class BaseMixin {
     switch (benchmarkType) {
       case "h":
         suite = new TpchSuite(runModeEnumeration.getSparkMasterUrl(), actions, 
testConf,
-            baselineConf, extraSparkConfScala, level, errorOnMemLeak, enableUi,
+            baselineConf, extraSparkConfScala, level, errorOnMemLeak, dataDir, 
enableUi,
             enableHsUi, hsUiPort, offHeapSize, disableAqe, disableBhj,
             disableWscg, shufflePartitions, scanPartitions);
         break;
       case "ds":
         suite = new TpcdsSuite(runModeEnumeration.getSparkMasterUrl(), 
actions, testConf,
-            baselineConf, extraSparkConfScala, level, errorOnMemLeak, enableUi,
+            baselineConf, extraSparkConfScala, level, errorOnMemLeak, dataDir, 
enableUi,
             enableHsUi, hsUiPort, offHeapSize, disableAqe, disableBhj,
             disableWscg, shufflePartitions, scanPartitions);
         break;
       case "clickbench":
         suite = new ClickBenchSuite(runModeEnumeration.getSparkMasterUrl(), 
actions, testConf,
-            baselineConf, extraSparkConfScala, level, errorOnMemLeak, enableUi,
+            baselineConf, extraSparkConfScala, level, errorOnMemLeak, dataDir, 
enableUi,
             enableHsUi, hsUiPort, offHeapSize, disableAqe, disableBhj,
             disableWscg, shufflePartitions, scanPartitions);
         break;
diff --git 
a/tools/gluten-it/common/src/main/scala/org/apache/gluten/integration/clickbench/ClickBenchSuite.scala
 
b/tools/gluten-it/common/src/main/scala/org/apache/gluten/integration/clickbench/ClickBenchSuite.scala
index 04a34d332..f75431941 100644
--- 
a/tools/gluten-it/common/src/main/scala/org/apache/gluten/integration/clickbench/ClickBenchSuite.scala
+++ 
b/tools/gluten-it/common/src/main/scala/org/apache/gluten/integration/clickbench/ClickBenchSuite.scala
@@ -21,6 +21,8 @@ import org.apache.gluten.integration.{DataGen, Suite, 
TableCreator}
 import org.apache.log4j.Level
 import org.apache.spark.SparkConf
 
+import java.io.File
+
 /**
  * ClickBench: a Benchmark For Analytical Databases
  *
@@ -35,6 +37,7 @@ class ClickBenchSuite(
     val extraSparkConf: Map[String, String],
     val logLevel: Level,
     val errorOnMemLeak: Boolean,
+    val dataDir: String,
     val enableUi: Boolean,
     val enableHsUi: Boolean,
     val hsUiPort: Int,
@@ -69,7 +72,7 @@ class ClickBenchSuite(
       scale: Double,
       genPartitionedData: Boolean): String = {
     checkDataGenArgs(scale, genPartitionedData)
-    DATA_WRITE_PATH
+    new File(dataDir).toPath.resolve(DATA_WRITE_RELATIVE_PATH + 
s"-$scale").toFile.getAbsolutePath
   }
 
   override private[integration] def createDataGen(
@@ -88,7 +91,7 @@ class ClickBenchSuite(
 }
 
 private object ClickBenchSuite {
-  private val DATA_WRITE_PATH = "/tmp/clickbench-generated"
+  private val DATA_WRITE_RELATIVE_PATH = "clickbench-generated"
   private val HISTORY_WRITE_PATH = "/tmp/clickbench-history"
   private val ALL_QUERY_IDS = (1 to 43).map(i => s"q$i").toArray
 
diff --git 
a/tools/gluten-it/common/src/main/scala/org/apache/gluten/integration/ds/TpcdsSuite.scala
 
b/tools/gluten-it/common/src/main/scala/org/apache/gluten/integration/ds/TpcdsSuite.scala
index a4365afde..190623614 100644
--- 
a/tools/gluten-it/common/src/main/scala/org/apache/gluten/integration/ds/TpcdsSuite.scala
+++ 
b/tools/gluten-it/common/src/main/scala/org/apache/gluten/integration/ds/TpcdsSuite.scala
@@ -17,15 +17,13 @@
 package org.apache.gluten.integration.ds
 
 import org.apache.gluten.integration.action.Action
-import org.apache.gluten.integration.ds.TpcdsSuite.{
-  ALL_QUERY_IDS,
-  HISTORY_WRITE_PATH,
-  TPCDS_WRITE_PATH
-}
+import org.apache.gluten.integration.ds.TpcdsSuite.{ALL_QUERY_IDS, 
HISTORY_WRITE_PATH, TPCDS_WRITE_RELATIVE_PATH}
 import org.apache.gluten.integration.{DataGen, Suite, TableCreator, 
TypeModifier}
 import org.apache.log4j.Level
 import org.apache.spark.SparkConf
 
+import java.io.File
+
 class TpcdsSuite(
     val masterUrl: String,
     val actions: Array[Action],
@@ -34,6 +32,7 @@ class TpcdsSuite(
     val extraSparkConf: Map[String, String],
     val logLevel: Level,
     val errorOnMemLeak: Boolean,
+    val dataDir: String,
     val enableUi: Boolean,
     val enableHsUi: Boolean,
     val hsUiPort: Int,
@@ -66,7 +65,7 @@ class TpcdsSuite(
   override private[integration] def dataWritePath(
       scale: Double,
       genPartitionedData: Boolean): String =
-    TPCDS_WRITE_PATH + s"-$scale"
+    new File(dataDir).toPath.resolve(TPCDS_WRITE_RELATIVE_PATH + 
s"-$scale").toFile.getAbsolutePath
 
   override private[integration] def createDataGen(
       scale: Double,
@@ -95,7 +94,7 @@ class TpcdsSuite(
 }
 
 object TpcdsSuite {
-  private val TPCDS_WRITE_PATH = "/tmp/tpcds-generated"
+  private val TPCDS_WRITE_RELATIVE_PATH = "tpcds-generated"
   private val ALL_QUERY_IDS = Array(
     "q1",
     "q2",
diff --git 
a/tools/gluten-it/common/src/main/scala/org/apache/gluten/integration/h/TpchSuite.scala
 
b/tools/gluten-it/common/src/main/scala/org/apache/gluten/integration/h/TpchSuite.scala
index bdcac3bff..86fcaea0a 100644
--- 
a/tools/gluten-it/common/src/main/scala/org/apache/gluten/integration/h/TpchSuite.scala
+++ 
b/tools/gluten-it/common/src/main/scala/org/apache/gluten/integration/h/TpchSuite.scala
@@ -17,11 +17,13 @@
 package org.apache.gluten.integration.h
 
 import org.apache.gluten.integration.action.Action
-import org.apache.gluten.integration.h.TpchSuite.{HISTORY_WRITE_PATH, 
TPCH_WRITE_PATH}
+import org.apache.gluten.integration.h.TpchSuite.{HISTORY_WRITE_PATH, 
TPCH_WRITE_RELATIVE_PATH}
 import org.apache.gluten.integration.{DataGen, Suite, TableCreator, 
TypeModifier}
 import org.apache.log4j.Level
 import org.apache.spark.SparkConf
 
+import java.io.File
+
 class TpchSuite(
     val masterUrl: String,
     val actions: Array[Action],
@@ -30,6 +32,7 @@ class TpchSuite(
     val extraSparkConf: Map[String, String],
     val logLevel: Level,
     val errorOnMemLeak: Boolean,
+    val dataDir: String,
     val enableUi: Boolean,
     val enableHsUi: Boolean,
     val hsUiPort: Int,
@@ -62,7 +65,7 @@ class TpchSuite(
   override private[integration] def dataWritePath(
       scale: Double,
       genPartitionedData: Boolean): String =
-    TPCH_WRITE_PATH + s"-$scale"
+    new File(dataDir).toPath.resolve(TPCH_WRITE_RELATIVE_PATH + 
s"-$scale").toFile.getAbsolutePath
 
   override private[integration] def createDataGen(
       scale: Double,
@@ -90,7 +93,7 @@ class TpchSuite(
 }
 
 object TpchSuite {
-  private val TPCH_WRITE_PATH = "/tmp/tpch-generated"
+  private val TPCH_WRITE_RELATIVE_PATH = "tpch-generated"
   private val ALL_QUERY_IDS = Array(
     "q1",
     "q2",
diff --git 
a/tools/gluten-te/ubuntu/examples/buildhere-veloxbe-portable-libs/scripts/all.sh
 
b/tools/gluten-te/ubuntu/examples/buildhere-veloxbe-portable-libs/scripts/all.sh
index 9fe627b99..267423552 100755
--- 
a/tools/gluten-te/ubuntu/examples/buildhere-veloxbe-portable-libs/scripts/all.sh
+++ 
b/tools/gluten-te/ubuntu/examples/buildhere-veloxbe-portable-libs/scripts/all.sh
@@ -19,7 +19,7 @@ set -ex
 export NUM_THREADS=$(nproc)
 export CMAKE_BUILD_PARALLEL_LEVEL=$(nproc)
 
-# Retry code that copied from https://unix.stackexchange.com/a/137639.
+# Retry code copied from https://unix.stackexchange.com/a/137639.
 function fail {
   echo $1 >&2
   exit 1
@@ -43,6 +43,7 @@ function retry {
 }
 
 cd /opt/gluten
-retry apt-get install curl zip unzip tar pkg-config autoconf-archive bison flex
+retry apt-get update
+retry apt-get install -y curl zip unzip tar pkg-config autoconf-archive bison 
flex
 retry source ./dev/vcpkg/env.sh
 retry dev/builddeps-veloxbe.sh --build_tests=OFF --build_benchmarks=OFF 
--enable_s3=ON --enable_gcs=ON --enable_hdfs=ON --enable_abfs=ON


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to