This is an automated email from the ASF dual-hosted git repository.

yuanzhou pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/incubator-gluten.git


The following commit(s) were added to refs/heads/main by this push:
     new 48f481fcb [VL] CI: Add TPC-H / TPC-DS job at SF30 with Spark 3.4 
(#5490)
48f481fcb is described below

commit 48f481fcbdab0758f7645d856f6266cb417da9e9
Author: Hongze Zhang <[email protected]>
AuthorDate: Wed Apr 24 14:27:52 2024 +0800

    [VL] CI: Add TPC-H / TPC-DS job at SF30 with Spark 3.4 (#5490)
    
    add a CI job for SF30, Spark 3.4. This could be able to test against 
possible regressions on larger scale factors and newer Spark versions, e.g., 
issues related to SHJ, or runtime filter, etc.
---
 .github/workflows/velox_docker.yml                 | 51 ++++++++++++++++++++++
 .../sketch => gluten/utils}/VeloxBloomFilter.java  |  4 +-
 .../utils}/VeloxBloomFilterJniWrapper.java         |  2 +-
 .../backendsapi/velox/VeloxSparkPlanExecApi.scala  |  8 ++--
 .../expression}/VeloxBloomFilterMightContain.scala |  8 ++--
 .../aggregate/VeloxBloomFilterAggregate.scala      |  6 ++-
 .../BloomFilterMightContainJointRewriteRule.scala  |  6 +--
 .../utils}/VeloxBloomFilterTest.java               |  4 +-
 cpp/velox/jni/VeloxJniWrapper.cc                   | 12 ++---
 cpp/velox/symbols.map                              |  2 +-
 10 files changed, 82 insertions(+), 21 deletions(-)

diff --git a/.github/workflows/velox_docker.yml 
b/.github/workflows/velox_docker.yml
index 271daf679..42e102ba7 100644
--- a/.github/workflows/velox_docker.yml
+++ b/.github/workflows/velox_docker.yml
@@ -339,6 +339,57 @@ jobs:
             --local --preset=velox --benchmark-type=ds --error-on-memleak 
-s=30.0  --off-heap-size=8g --threads=12 --shuffle-partitions=72 --iterations=1 
\
             --skip-data-gen  --random-kill-tasks
 
+  run-tpc-test-ubuntu-sf30:
+    needs: build-native-lib
+    strategy:
+      fail-fast: false
+      matrix:
+        spark: [ "spark-3.4" ]
+    runs-on: ubuntu-20.04
+    steps:
+      - name: Maximize build disk space
+        shell: bash
+        run: |
+          df -h
+          set -euo pipefail
+          echo "Removing unwanted software... "
+          sudo rm -rf /usr/share/dotnet
+          sudo rm -rf /usr/local/lib/android
+          sudo rm -rf /opt/ghc
+          sudo rm -rf /opt/hostedtoolcache/CodeQL
+          sudo docker image prune --all --force > /dev/null
+          df -h
+      - uses: actions/checkout@v2
+      - name: Download All Artifacts
+        uses: actions/download-artifact@v2
+        with:
+          name: velox-native-lib-${{github.sha}}
+          path: ./cpp/build/releases
+      - name: Setup java and maven
+        run: |
+          sudo apt-get update
+          sudo apt-get install -y openjdk-8-jdk maven
+      - name: Set environment variables
+        run: |
+          echo "JAVA_HOME=/usr/lib/jvm/java-8-openjdk-amd64" >> $GITHUB_ENV
+      - name: Build for Spark ${{ matrix.spark }}
+        run: |
+          cd $GITHUB_WORKSPACE/ 
+          mvn -ntp clean install -P${{ matrix.spark }} -Pbackends-velox 
-DskipTests
+          cd $GITHUB_WORKSPACE/tools/gluten-it
+          mvn -ntp clean install -P${{ matrix.spark }}
+          GLUTEN_IT_JVM_ARGS=-Xmx6G sbin/gluten-it.sh data-gen-only --local 
--benchmark-type=h -s=30.0 --threads=12
+          GLUTEN_IT_JVM_ARGS=-Xmx6G sbin/gluten-it.sh data-gen-only --local 
--benchmark-type=ds -s=30.0 --threads=12
+      - name: TPC-H / TPC-DS SF30.0 Parquet local spark3.4
+        run: |
+          cd tools/gluten-it \
+          && GLUTEN_IT_JVM_ARGS=-Xmx6G sbin/gluten-it.sh queries-compare \
+            --local --preset=velox --benchmark-type=h --error-on-memleak 
-s=30.0  --off-heap-size=8g --threads=12 --shuffle-partitions=72 --iterations=1 
\
+            --skip-data-gen \
+          && GLUTEN_IT_JVM_ARGS=-Xmx6G sbin/gluten-it.sh queries-compare \
+            --local --preset=velox --benchmark-type=ds --error-on-memleak 
-s=30.0  --off-heap-size=8g --threads=12 --shuffle-partitions=72 --iterations=1 
\
+            --skip-data-gen
+
   run-tpc-test-centos8-uniffle:
     needs: build-native-lib
     strategy:
diff --git 
a/backends-velox/src/main/java/org/apache/spark/util/sketch/VeloxBloomFilter.java
 b/backends-velox/src/main/java/org/apache/gluten/utils/VeloxBloomFilter.java
similarity index 96%
rename from 
backends-velox/src/main/java/org/apache/spark/util/sketch/VeloxBloomFilter.java
rename to 
backends-velox/src/main/java/org/apache/gluten/utils/VeloxBloomFilter.java
index 59716ed79..13ba8e011 100644
--- 
a/backends-velox/src/main/java/org/apache/spark/util/sketch/VeloxBloomFilter.java
+++ b/backends-velox/src/main/java/org/apache/gluten/utils/VeloxBloomFilter.java
@@ -14,9 +14,11 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-package org.apache.spark.util.sketch;
+package org.apache.gluten.utils;
 
 import org.apache.commons.io.IOUtils;
+import org.apache.spark.util.sketch.BloomFilter;
+import org.apache.spark.util.sketch.IncompatibleMergeException;
 
 import java.io.ByteArrayInputStream;
 import java.io.ByteArrayOutputStream;
diff --git 
a/backends-velox/src/main/java/org/apache/spark/util/sketch/VeloxBloomFilterJniWrapper.java
 
b/backends-velox/src/main/java/org/apache/gluten/utils/VeloxBloomFilterJniWrapper.java
similarity index 97%
rename from 
backends-velox/src/main/java/org/apache/spark/util/sketch/VeloxBloomFilterJniWrapper.java
rename to 
backends-velox/src/main/java/org/apache/gluten/utils/VeloxBloomFilterJniWrapper.java
index 572e2c7ac..94f8e17bc 100644
--- 
a/backends-velox/src/main/java/org/apache/spark/util/sketch/VeloxBloomFilterJniWrapper.java
+++ 
b/backends-velox/src/main/java/org/apache/gluten/utils/VeloxBloomFilterJniWrapper.java
@@ -14,7 +14,7 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-package org.apache.spark.util.sketch;
+package org.apache.gluten.utils;
 
 import org.apache.gluten.exec.Runtime;
 import org.apache.gluten.exec.RuntimeAware;
diff --git 
a/backends-velox/src/main/scala/org/apache/gluten/backendsapi/velox/VeloxSparkPlanExecApi.scala
 
b/backends-velox/src/main/scala/org/apache/gluten/backendsapi/velox/VeloxSparkPlanExecApi.scala
index c75a25e01..7463c6340 100644
--- 
a/backends-velox/src/main/scala/org/apache/gluten/backendsapi/velox/VeloxSparkPlanExecApi.scala
+++ 
b/backends-velox/src/main/scala/org/apache/gluten/backendsapi/velox/VeloxSparkPlanExecApi.scala
@@ -22,6 +22,8 @@ import org.apache.gluten.exception.GlutenNotSupportException
 import org.apache.gluten.execution._
 import org.apache.gluten.expression._
 import org.apache.gluten.expression.ConverterUtils.FunctionConfig
+import org.apache.gluten.expression.aggregate.VeloxBloomFilterAggregate
+import org.apache.gluten.extension.BloomFilterMightContainJointRewriteRule
 import org.apache.gluten.extension.columnar.TransformHints
 import org.apache.gluten.sql.shims.SparkShimLoader
 import org.apache.gluten.substrait.expression.{ExpressionBuilder, 
ExpressionNode, IfThenNode}
@@ -33,12 +35,12 @@ import org.apache.spark.serializer.Serializer
 import org.apache.spark.shuffle.{GenShuffleWriterParameters, 
GlutenShuffleWriterWrapper}
 import org.apache.spark.shuffle.utils.ShuffleUtil
 import org.apache.spark.sql.{SparkSession, Strategy}
-import org.apache.spark.sql.catalyst.{AggregateFunctionRewriteRule, 
BloomFilterMightContainJointRewriteRule, FlushableHashAggregateRule, 
FunctionIdentifier}
+import org.apache.spark.sql.catalyst.{AggregateFunctionRewriteRule, 
FlushableHashAggregateRule, FunctionIdentifier}
 import org.apache.spark.sql.catalyst.analysis.FunctionRegistry.FunctionBuilder
 import org.apache.spark.sql.catalyst.catalog.BucketSpec
 import org.apache.spark.sql.catalyst.catalog.CatalogTypes.TablePartitionSpec
-import org.apache.spark.sql.catalyst.expressions.{Add, Alias, ArrayExists, 
ArrayFilter, ArrayForAll, ArrayTransform, Ascending, Attribute, Cast, 
CreateNamedStruct, ElementAt, Expression, ExpressionInfo, Generator, 
GetArrayItem, GetMapValue, GetStructField, If, IsNaN, LambdaFunction, Literal, 
Murmur3Hash, NamedExpression, NaNvl, PosExplode, Round, SortOrder, StringSplit, 
StringTrim, TryEval, Uuid, VeloxBloomFilterMightContain}
-import 
org.apache.spark.sql.catalyst.expressions.aggregate.{AggregateExpression, 
HLLAdapter, VeloxBloomFilterAggregate}
+import org.apache.spark.sql.catalyst.expressions.{Add, Alias, ArrayExists, 
ArrayFilter, ArrayForAll, ArrayTransform, Ascending, Attribute, Cast, 
CreateNamedStruct, ElementAt, Expression, ExpressionInfo, Generator, 
GetArrayItem, GetMapValue, GetStructField, If, IsNaN, LambdaFunction, Literal, 
Murmur3Hash, NamedExpression, NaNvl, PosExplode, Round, SortOrder, StringSplit, 
StringTrim, TryEval, Uuid}
+import 
org.apache.spark.sql.catalyst.expressions.aggregate.{AggregateExpression, 
HLLAdapter}
 import org.apache.spark.sql.catalyst.optimizer.BuildSide
 import org.apache.spark.sql.catalyst.plans.JoinType
 import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
diff --git 
a/backends-velox/src/main/scala/org/apache/spark/sql/catalyst/expressions/VeloxBloomFilterMightContain.scala
 
b/backends-velox/src/main/scala/org/apache/gluten/expression/VeloxBloomFilterMightContain.scala
similarity index 94%
rename from 
backends-velox/src/main/scala/org/apache/spark/sql/catalyst/expressions/VeloxBloomFilterMightContain.scala
rename to 
backends-velox/src/main/scala/org/apache/gluten/expression/VeloxBloomFilterMightContain.scala
index 77885caea..8c2a7e492 100644
--- 
a/backends-velox/src/main/scala/org/apache/spark/sql/catalyst/expressions/VeloxBloomFilterMightContain.scala
+++ 
b/backends-velox/src/main/scala/org/apache/gluten/expression/VeloxBloomFilterMightContain.scala
@@ -14,16 +14,18 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-package org.apache.spark.sql.catalyst.expressions
+package org.apache.gluten.expression
+
 import org.apache.gluten.sql.shims.SparkShimLoader
+import org.apache.gluten.utils.VeloxBloomFilter
 
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.analysis.TypeCheckResult
-import org.apache.spark.sql.catalyst.expressions.codegen.{CodegenContext, 
CodeGenerator, ExprCode, JavaCode, TrueLiteral}
+import org.apache.spark.sql.catalyst.expressions.{BinaryExpression, Expression}
+import org.apache.spark.sql.catalyst.expressions.codegen._
 import org.apache.spark.sql.catalyst.expressions.codegen.Block.BlockHelper
 import org.apache.spark.sql.types.DataType
 import org.apache.spark.util.TaskResources
-import org.apache.spark.util.sketch.VeloxBloomFilter
 
 /**
  * Velox's bloom-filter implementation uses different algorithms internally 
comparing to vanilla
diff --git 
a/backends-velox/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/VeloxBloomFilterAggregate.scala
 
b/backends-velox/src/main/scala/org/apache/gluten/expression/aggregate/VeloxBloomFilterAggregate.scala
similarity index 95%
rename from 
backends-velox/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/VeloxBloomFilterAggregate.scala
rename to 
backends-velox/src/main/scala/org/apache/gluten/expression/aggregate/VeloxBloomFilterAggregate.scala
index da545aa47..d22cc7023 100644
--- 
a/backends-velox/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/VeloxBloomFilterAggregate.scala
+++ 
b/backends-velox/src/main/scala/org/apache/gluten/expression/aggregate/VeloxBloomFilterAggregate.scala
@@ -14,18 +14,20 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-package org.apache.spark.sql.catalyst.expressions.aggregate
+package org.apache.gluten.expression.aggregate
 
 import org.apache.gluten.sql.shims.SparkShimLoader
+import org.apache.gluten.utils.VeloxBloomFilter
 
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.analysis.TypeCheckResult
 import org.apache.spark.sql.catalyst.expressions.Expression
+import 
org.apache.spark.sql.catalyst.expressions.aggregate.TypedImperativeAggregate
 import org.apache.spark.sql.catalyst.trees.TernaryLike
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types.DataType
 import org.apache.spark.util.TaskResources
-import org.apache.spark.util.sketch.{BloomFilter, VeloxBloomFilter}
+import org.apache.spark.util.sketch.BloomFilter
 
 /**
  * Velox's bloom-filter implementation uses different algorithms internally 
comparing to vanilla
diff --git 
a/backends-velox/src/main/scala/org/apache/spark/sql/catalyst/BloomFilterMightContainJointRewriteRule.scala
 
b/backends-velox/src/main/scala/org/apache/gluten/extension/BloomFilterMightContainJointRewriteRule.scala
similarity index 89%
rename from 
backends-velox/src/main/scala/org/apache/spark/sql/catalyst/BloomFilterMightContainJointRewriteRule.scala
rename to 
backends-velox/src/main/scala/org/apache/gluten/extension/BloomFilterMightContainJointRewriteRule.scala
index 7d15e32b3..c8cb4cca3 100644
--- 
a/backends-velox/src/main/scala/org/apache/spark/sql/catalyst/BloomFilterMightContainJointRewriteRule.scala
+++ 
b/backends-velox/src/main/scala/org/apache/gluten/extension/BloomFilterMightContainJointRewriteRule.scala
@@ -14,14 +14,14 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-package org.apache.spark.sql.catalyst
+package org.apache.gluten.extension
 
 import org.apache.gluten.GlutenConfig
+import org.apache.gluten.expression.VeloxBloomFilterMightContain
+import org.apache.gluten.expression.aggregate.VeloxBloomFilterAggregate
 import org.apache.gluten.sql.shims.SparkShimLoader
 
 import org.apache.spark.sql.SparkSession
-import org.apache.spark.sql.catalyst.expressions.VeloxBloomFilterMightContain
-import 
org.apache.spark.sql.catalyst.expressions.aggregate.VeloxBloomFilterAggregate
 import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
 import org.apache.spark.sql.catalyst.rules.Rule
 
diff --git 
a/backends-velox/src/test/java/org/apache/spark/util/sketch/VeloxBloomFilterTest.java
 
b/backends-velox/src/test/java/org/apache/gluten/utils/VeloxBloomFilterTest.java
similarity index 97%
rename from 
backends-velox/src/test/java/org/apache/spark/util/sketch/VeloxBloomFilterTest.java
rename to 
backends-velox/src/test/java/org/apache/gluten/utils/VeloxBloomFilterTest.java
index 0ebc5d4c5..db54bd783 100644
--- 
a/backends-velox/src/test/java/org/apache/spark/util/sketch/VeloxBloomFilterTest.java
+++ 
b/backends-velox/src/test/java/org/apache/gluten/utils/VeloxBloomFilterTest.java
@@ -14,13 +14,15 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-package org.apache.spark.util.sketch;
+package org.apache.gluten.utils;
 
 import org.apache.gluten.backendsapi.ListenerApi;
 import org.apache.gluten.backendsapi.velox.VeloxListenerApi;
 
 import org.apache.spark.SparkConf;
 import org.apache.spark.util.TaskResources$;
+import org.apache.spark.util.sketch.BloomFilter;
+import org.apache.spark.util.sketch.IncompatibleMergeException;
 import org.junit.Assert;
 import org.junit.BeforeClass;
 import org.junit.Test;
diff --git a/cpp/velox/jni/VeloxJniWrapper.cc b/cpp/velox/jni/VeloxJniWrapper.cc
index a3c51f64a..b5ae96a04 100644
--- a/cpp/velox/jni/VeloxJniWrapper.cc
+++ b/cpp/velox/jni/VeloxJniWrapper.cc
@@ -141,7 +141,7 @@ 
Java_org_apache_gluten_vectorized_PlanEvaluatorJniWrapper_nativeValidateWithFail
   JNI_METHOD_END(nullptr)
 }
 
-JNIEXPORT jlong JNICALL 
Java_org_apache_spark_util_sketch_VeloxBloomFilterJniWrapper_empty( // NOLINT
+JNIEXPORT jlong JNICALL 
Java_org_apache_gluten_utils_VeloxBloomFilterJniWrapper_empty( // NOLINT
     JNIEnv* env,
     jobject wrapper,
     jint capacity) {
@@ -154,7 +154,7 @@ JNIEXPORT jlong JNICALL 
Java_org_apache_spark_util_sketch_VeloxBloomFilterJniWra
   JNI_METHOD_END(gluten::kInvalidResourceHandle)
 }
 
-JNIEXPORT jlong JNICALL 
Java_org_apache_spark_util_sketch_VeloxBloomFilterJniWrapper_init( // NOLINT
+JNIEXPORT jlong JNICALL 
Java_org_apache_gluten_utils_VeloxBloomFilterJniWrapper_init( // NOLINT
     JNIEnv* env,
     jobject wrapper,
     jbyteArray data) {
@@ -169,7 +169,7 @@ JNIEXPORT jlong JNICALL 
Java_org_apache_spark_util_sketch_VeloxBloomFilterJniWra
   JNI_METHOD_END(gluten::kInvalidResourceHandle)
 }
 
-JNIEXPORT void JNICALL 
Java_org_apache_spark_util_sketch_VeloxBloomFilterJniWrapper_insertLong( // 
NOLINT
+JNIEXPORT void JNICALL 
Java_org_apache_gluten_utils_VeloxBloomFilterJniWrapper_insertLong( // NOLINT
     JNIEnv* env,
     jobject wrapper,
     jlong handle,
@@ -182,7 +182,7 @@ JNIEXPORT void JNICALL 
Java_org_apache_spark_util_sketch_VeloxBloomFilterJniWrap
   JNI_METHOD_END()
 }
 
-JNIEXPORT jboolean JNICALL 
Java_org_apache_spark_util_sketch_VeloxBloomFilterJniWrapper_mightContainLong( 
// NOLINT
+JNIEXPORT jboolean JNICALL 
Java_org_apache_gluten_utils_VeloxBloomFilterJniWrapper_mightContainLong( // 
NOLINT
     JNIEnv* env,
     jobject wrapper,
     jlong handle,
@@ -207,7 +207,7 @@ static std::vector<char> 
serialize(BloomFilter<std::allocator<uint64_t>>* bf) {
 }
 } // namespace
 
-JNIEXPORT void JNICALL 
Java_org_apache_spark_util_sketch_VeloxBloomFilterJniWrapper_mergeFrom( // 
NOLINT
+JNIEXPORT void JNICALL 
Java_org_apache_gluten_utils_VeloxBloomFilterJniWrapper_mergeFrom( // NOLINT
     JNIEnv* env,
     jobject wrapper,
     jlong handle,
@@ -223,7 +223,7 @@ JNIEXPORT void JNICALL 
Java_org_apache_spark_util_sketch_VeloxBloomFilterJniWrap
   JNI_METHOD_END()
 }
 
-JNIEXPORT jbyteArray JNICALL 
Java_org_apache_spark_util_sketch_VeloxBloomFilterJniWrapper_serialize( // 
NOLINT
+JNIEXPORT jbyteArray JNICALL 
Java_org_apache_gluten_utils_VeloxBloomFilterJniWrapper_serialize( // NOLINT
     JNIEnv* env,
     jobject wrapper,
     jlong handle) {
diff --git a/cpp/velox/symbols.map b/cpp/velox/symbols.map
index 42518c387..ebd2b9af0 100644
--- a/cpp/velox/symbols.map
+++ b/cpp/velox/symbols.map
@@ -5,7 +5,7 @@
         *facebook::velox::*;
     };
 
-    Java_org_apache_gluten*;
+    Java_org_apache_gluten_*;
   local:
     # Hide symbols of static dependencies
     *;


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to