This is an automated email from the ASF dual-hosted git repository.
yuanzhou pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/incubator-gluten.git
The following commit(s) were added to refs/heads/main by this push:
new 48f481fcb [VL] CI: Add TPC-H / TPC-DS job at SF30 with Spark 3.4
(#5490)
48f481fcb is described below
commit 48f481fcbdab0758f7645d856f6266cb417da9e9
Author: Hongze Zhang <[email protected]>
AuthorDate: Wed Apr 24 14:27:52 2024 +0800
[VL] CI: Add TPC-H / TPC-DS job at SF30 with Spark 3.4 (#5490)
add a CI job for SF30, Spark 3.4. This could be able to test against
possible regressions on larger scale factors and newer Spark versions, e.g.,
issues related to SHJ, or runtime filter, etc.
---
.github/workflows/velox_docker.yml | 51 ++++++++++++++++++++++
.../sketch => gluten/utils}/VeloxBloomFilter.java | 4 +-
.../utils}/VeloxBloomFilterJniWrapper.java | 2 +-
.../backendsapi/velox/VeloxSparkPlanExecApi.scala | 8 ++--
.../expression}/VeloxBloomFilterMightContain.scala | 8 ++--
.../aggregate/VeloxBloomFilterAggregate.scala | 6 ++-
.../BloomFilterMightContainJointRewriteRule.scala | 6 +--
.../utils}/VeloxBloomFilterTest.java | 4 +-
cpp/velox/jni/VeloxJniWrapper.cc | 12 ++---
cpp/velox/symbols.map | 2 +-
10 files changed, 82 insertions(+), 21 deletions(-)
diff --git a/.github/workflows/velox_docker.yml
b/.github/workflows/velox_docker.yml
index 271daf679..42e102ba7 100644
--- a/.github/workflows/velox_docker.yml
+++ b/.github/workflows/velox_docker.yml
@@ -339,6 +339,57 @@ jobs:
--local --preset=velox --benchmark-type=ds --error-on-memleak
-s=30.0 --off-heap-size=8g --threads=12 --shuffle-partitions=72 --iterations=1
\
--skip-data-gen --random-kill-tasks
+ run-tpc-test-ubuntu-sf30:
+ needs: build-native-lib
+ strategy:
+ fail-fast: false
+ matrix:
+ spark: [ "spark-3.4" ]
+ runs-on: ubuntu-20.04
+ steps:
+ - name: Maximize build disk space
+ shell: bash
+ run: |
+ df -h
+ set -euo pipefail
+ echo "Removing unwanted software... "
+ sudo rm -rf /usr/share/dotnet
+ sudo rm -rf /usr/local/lib/android
+ sudo rm -rf /opt/ghc
+ sudo rm -rf /opt/hostedtoolcache/CodeQL
+ sudo docker image prune --all --force > /dev/null
+ df -h
+ - uses: actions/checkout@v2
+ - name: Download All Artifacts
+ uses: actions/download-artifact@v2
+ with:
+ name: velox-native-lib-${{github.sha}}
+ path: ./cpp/build/releases
+ - name: Setup java and maven
+ run: |
+ sudo apt-get update
+ sudo apt-get install -y openjdk-8-jdk maven
+ - name: Set environment variables
+ run: |
+ echo "JAVA_HOME=/usr/lib/jvm/java-8-openjdk-amd64" >> $GITHUB_ENV
+ - name: Build for Spark ${{ matrix.spark }}
+ run: |
+ cd $GITHUB_WORKSPACE/
+ mvn -ntp clean install -P${{ matrix.spark }} -Pbackends-velox
-DskipTests
+ cd $GITHUB_WORKSPACE/tools/gluten-it
+ mvn -ntp clean install -P${{ matrix.spark }}
+ GLUTEN_IT_JVM_ARGS=-Xmx6G sbin/gluten-it.sh data-gen-only --local
--benchmark-type=h -s=30.0 --threads=12
+ GLUTEN_IT_JVM_ARGS=-Xmx6G sbin/gluten-it.sh data-gen-only --local
--benchmark-type=ds -s=30.0 --threads=12
+ - name: TPC-H / TPC-DS SF30.0 Parquet local spark3.4
+ run: |
+ cd tools/gluten-it \
+ && GLUTEN_IT_JVM_ARGS=-Xmx6G sbin/gluten-it.sh queries-compare \
+ --local --preset=velox --benchmark-type=h --error-on-memleak
-s=30.0 --off-heap-size=8g --threads=12 --shuffle-partitions=72 --iterations=1
\
+ --skip-data-gen \
+ && GLUTEN_IT_JVM_ARGS=-Xmx6G sbin/gluten-it.sh queries-compare \
+ --local --preset=velox --benchmark-type=ds --error-on-memleak
-s=30.0 --off-heap-size=8g --threads=12 --shuffle-partitions=72 --iterations=1
\
+ --skip-data-gen
+
run-tpc-test-centos8-uniffle:
needs: build-native-lib
strategy:
diff --git
a/backends-velox/src/main/java/org/apache/spark/util/sketch/VeloxBloomFilter.java
b/backends-velox/src/main/java/org/apache/gluten/utils/VeloxBloomFilter.java
similarity index 96%
rename from
backends-velox/src/main/java/org/apache/spark/util/sketch/VeloxBloomFilter.java
rename to
backends-velox/src/main/java/org/apache/gluten/utils/VeloxBloomFilter.java
index 59716ed79..13ba8e011 100644
---
a/backends-velox/src/main/java/org/apache/spark/util/sketch/VeloxBloomFilter.java
+++ b/backends-velox/src/main/java/org/apache/gluten/utils/VeloxBloomFilter.java
@@ -14,9 +14,11 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-package org.apache.spark.util.sketch;
+package org.apache.gluten.utils;
import org.apache.commons.io.IOUtils;
+import org.apache.spark.util.sketch.BloomFilter;
+import org.apache.spark.util.sketch.IncompatibleMergeException;
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
diff --git
a/backends-velox/src/main/java/org/apache/spark/util/sketch/VeloxBloomFilterJniWrapper.java
b/backends-velox/src/main/java/org/apache/gluten/utils/VeloxBloomFilterJniWrapper.java
similarity index 97%
rename from
backends-velox/src/main/java/org/apache/spark/util/sketch/VeloxBloomFilterJniWrapper.java
rename to
backends-velox/src/main/java/org/apache/gluten/utils/VeloxBloomFilterJniWrapper.java
index 572e2c7ac..94f8e17bc 100644
---
a/backends-velox/src/main/java/org/apache/spark/util/sketch/VeloxBloomFilterJniWrapper.java
+++
b/backends-velox/src/main/java/org/apache/gluten/utils/VeloxBloomFilterJniWrapper.java
@@ -14,7 +14,7 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-package org.apache.spark.util.sketch;
+package org.apache.gluten.utils;
import org.apache.gluten.exec.Runtime;
import org.apache.gluten.exec.RuntimeAware;
diff --git
a/backends-velox/src/main/scala/org/apache/gluten/backendsapi/velox/VeloxSparkPlanExecApi.scala
b/backends-velox/src/main/scala/org/apache/gluten/backendsapi/velox/VeloxSparkPlanExecApi.scala
index c75a25e01..7463c6340 100644
---
a/backends-velox/src/main/scala/org/apache/gluten/backendsapi/velox/VeloxSparkPlanExecApi.scala
+++
b/backends-velox/src/main/scala/org/apache/gluten/backendsapi/velox/VeloxSparkPlanExecApi.scala
@@ -22,6 +22,8 @@ import org.apache.gluten.exception.GlutenNotSupportException
import org.apache.gluten.execution._
import org.apache.gluten.expression._
import org.apache.gluten.expression.ConverterUtils.FunctionConfig
+import org.apache.gluten.expression.aggregate.VeloxBloomFilterAggregate
+import org.apache.gluten.extension.BloomFilterMightContainJointRewriteRule
import org.apache.gluten.extension.columnar.TransformHints
import org.apache.gluten.sql.shims.SparkShimLoader
import org.apache.gluten.substrait.expression.{ExpressionBuilder,
ExpressionNode, IfThenNode}
@@ -33,12 +35,12 @@ import org.apache.spark.serializer.Serializer
import org.apache.spark.shuffle.{GenShuffleWriterParameters,
GlutenShuffleWriterWrapper}
import org.apache.spark.shuffle.utils.ShuffleUtil
import org.apache.spark.sql.{SparkSession, Strategy}
-import org.apache.spark.sql.catalyst.{AggregateFunctionRewriteRule,
BloomFilterMightContainJointRewriteRule, FlushableHashAggregateRule,
FunctionIdentifier}
+import org.apache.spark.sql.catalyst.{AggregateFunctionRewriteRule,
FlushableHashAggregateRule, FunctionIdentifier}
import org.apache.spark.sql.catalyst.analysis.FunctionRegistry.FunctionBuilder
import org.apache.spark.sql.catalyst.catalog.BucketSpec
import org.apache.spark.sql.catalyst.catalog.CatalogTypes.TablePartitionSpec
-import org.apache.spark.sql.catalyst.expressions.{Add, Alias, ArrayExists,
ArrayFilter, ArrayForAll, ArrayTransform, Ascending, Attribute, Cast,
CreateNamedStruct, ElementAt, Expression, ExpressionInfo, Generator,
GetArrayItem, GetMapValue, GetStructField, If, IsNaN, LambdaFunction, Literal,
Murmur3Hash, NamedExpression, NaNvl, PosExplode, Round, SortOrder, StringSplit,
StringTrim, TryEval, Uuid, VeloxBloomFilterMightContain}
-import
org.apache.spark.sql.catalyst.expressions.aggregate.{AggregateExpression,
HLLAdapter, VeloxBloomFilterAggregate}
+import org.apache.spark.sql.catalyst.expressions.{Add, Alias, ArrayExists,
ArrayFilter, ArrayForAll, ArrayTransform, Ascending, Attribute, Cast,
CreateNamedStruct, ElementAt, Expression, ExpressionInfo, Generator,
GetArrayItem, GetMapValue, GetStructField, If, IsNaN, LambdaFunction, Literal,
Murmur3Hash, NamedExpression, NaNvl, PosExplode, Round, SortOrder, StringSplit,
StringTrim, TryEval, Uuid}
+import
org.apache.spark.sql.catalyst.expressions.aggregate.{AggregateExpression,
HLLAdapter}
import org.apache.spark.sql.catalyst.optimizer.BuildSide
import org.apache.spark.sql.catalyst.plans.JoinType
import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
diff --git
a/backends-velox/src/main/scala/org/apache/spark/sql/catalyst/expressions/VeloxBloomFilterMightContain.scala
b/backends-velox/src/main/scala/org/apache/gluten/expression/VeloxBloomFilterMightContain.scala
similarity index 94%
rename from
backends-velox/src/main/scala/org/apache/spark/sql/catalyst/expressions/VeloxBloomFilterMightContain.scala
rename to
backends-velox/src/main/scala/org/apache/gluten/expression/VeloxBloomFilterMightContain.scala
index 77885caea..8c2a7e492 100644
---
a/backends-velox/src/main/scala/org/apache/spark/sql/catalyst/expressions/VeloxBloomFilterMightContain.scala
+++
b/backends-velox/src/main/scala/org/apache/gluten/expression/VeloxBloomFilterMightContain.scala
@@ -14,16 +14,18 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-package org.apache.spark.sql.catalyst.expressions
+package org.apache.gluten.expression
+
import org.apache.gluten.sql.shims.SparkShimLoader
+import org.apache.gluten.utils.VeloxBloomFilter
import org.apache.spark.sql.catalyst.InternalRow
import org.apache.spark.sql.catalyst.analysis.TypeCheckResult
-import org.apache.spark.sql.catalyst.expressions.codegen.{CodegenContext,
CodeGenerator, ExprCode, JavaCode, TrueLiteral}
+import org.apache.spark.sql.catalyst.expressions.{BinaryExpression, Expression}
+import org.apache.spark.sql.catalyst.expressions.codegen._
import org.apache.spark.sql.catalyst.expressions.codegen.Block.BlockHelper
import org.apache.spark.sql.types.DataType
import org.apache.spark.util.TaskResources
-import org.apache.spark.util.sketch.VeloxBloomFilter
/**
* Velox's bloom-filter implementation uses different algorithms internally
comparing to vanilla
diff --git
a/backends-velox/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/VeloxBloomFilterAggregate.scala
b/backends-velox/src/main/scala/org/apache/gluten/expression/aggregate/VeloxBloomFilterAggregate.scala
similarity index 95%
rename from
backends-velox/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/VeloxBloomFilterAggregate.scala
rename to
backends-velox/src/main/scala/org/apache/gluten/expression/aggregate/VeloxBloomFilterAggregate.scala
index da545aa47..d22cc7023 100644
---
a/backends-velox/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/VeloxBloomFilterAggregate.scala
+++
b/backends-velox/src/main/scala/org/apache/gluten/expression/aggregate/VeloxBloomFilterAggregate.scala
@@ -14,18 +14,20 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-package org.apache.spark.sql.catalyst.expressions.aggregate
+package org.apache.gluten.expression.aggregate
import org.apache.gluten.sql.shims.SparkShimLoader
+import org.apache.gluten.utils.VeloxBloomFilter
import org.apache.spark.sql.catalyst.InternalRow
import org.apache.spark.sql.catalyst.analysis.TypeCheckResult
import org.apache.spark.sql.catalyst.expressions.Expression
+import
org.apache.spark.sql.catalyst.expressions.aggregate.TypedImperativeAggregate
import org.apache.spark.sql.catalyst.trees.TernaryLike
import org.apache.spark.sql.internal.SQLConf
import org.apache.spark.sql.types.DataType
import org.apache.spark.util.TaskResources
-import org.apache.spark.util.sketch.{BloomFilter, VeloxBloomFilter}
+import org.apache.spark.util.sketch.BloomFilter
/**
* Velox's bloom-filter implementation uses different algorithms internally
comparing to vanilla
diff --git
a/backends-velox/src/main/scala/org/apache/spark/sql/catalyst/BloomFilterMightContainJointRewriteRule.scala
b/backends-velox/src/main/scala/org/apache/gluten/extension/BloomFilterMightContainJointRewriteRule.scala
similarity index 89%
rename from
backends-velox/src/main/scala/org/apache/spark/sql/catalyst/BloomFilterMightContainJointRewriteRule.scala
rename to
backends-velox/src/main/scala/org/apache/gluten/extension/BloomFilterMightContainJointRewriteRule.scala
index 7d15e32b3..c8cb4cca3 100644
---
a/backends-velox/src/main/scala/org/apache/spark/sql/catalyst/BloomFilterMightContainJointRewriteRule.scala
+++
b/backends-velox/src/main/scala/org/apache/gluten/extension/BloomFilterMightContainJointRewriteRule.scala
@@ -14,14 +14,14 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-package org.apache.spark.sql.catalyst
+package org.apache.gluten.extension
import org.apache.gluten.GlutenConfig
+import org.apache.gluten.expression.VeloxBloomFilterMightContain
+import org.apache.gluten.expression.aggregate.VeloxBloomFilterAggregate
import org.apache.gluten.sql.shims.SparkShimLoader
import org.apache.spark.sql.SparkSession
-import org.apache.spark.sql.catalyst.expressions.VeloxBloomFilterMightContain
-import
org.apache.spark.sql.catalyst.expressions.aggregate.VeloxBloomFilterAggregate
import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
import org.apache.spark.sql.catalyst.rules.Rule
diff --git
a/backends-velox/src/test/java/org/apache/spark/util/sketch/VeloxBloomFilterTest.java
b/backends-velox/src/test/java/org/apache/gluten/utils/VeloxBloomFilterTest.java
similarity index 97%
rename from
backends-velox/src/test/java/org/apache/spark/util/sketch/VeloxBloomFilterTest.java
rename to
backends-velox/src/test/java/org/apache/gluten/utils/VeloxBloomFilterTest.java
index 0ebc5d4c5..db54bd783 100644
---
a/backends-velox/src/test/java/org/apache/spark/util/sketch/VeloxBloomFilterTest.java
+++
b/backends-velox/src/test/java/org/apache/gluten/utils/VeloxBloomFilterTest.java
@@ -14,13 +14,15 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-package org.apache.spark.util.sketch;
+package org.apache.gluten.utils;
import org.apache.gluten.backendsapi.ListenerApi;
import org.apache.gluten.backendsapi.velox.VeloxListenerApi;
import org.apache.spark.SparkConf;
import org.apache.spark.util.TaskResources$;
+import org.apache.spark.util.sketch.BloomFilter;
+import org.apache.spark.util.sketch.IncompatibleMergeException;
import org.junit.Assert;
import org.junit.BeforeClass;
import org.junit.Test;
diff --git a/cpp/velox/jni/VeloxJniWrapper.cc b/cpp/velox/jni/VeloxJniWrapper.cc
index a3c51f64a..b5ae96a04 100644
--- a/cpp/velox/jni/VeloxJniWrapper.cc
+++ b/cpp/velox/jni/VeloxJniWrapper.cc
@@ -141,7 +141,7 @@
Java_org_apache_gluten_vectorized_PlanEvaluatorJniWrapper_nativeValidateWithFail
JNI_METHOD_END(nullptr)
}
-JNIEXPORT jlong JNICALL
Java_org_apache_spark_util_sketch_VeloxBloomFilterJniWrapper_empty( // NOLINT
+JNIEXPORT jlong JNICALL
Java_org_apache_gluten_utils_VeloxBloomFilterJniWrapper_empty( // NOLINT
JNIEnv* env,
jobject wrapper,
jint capacity) {
@@ -154,7 +154,7 @@ JNIEXPORT jlong JNICALL
Java_org_apache_spark_util_sketch_VeloxBloomFilterJniWra
JNI_METHOD_END(gluten::kInvalidResourceHandle)
}
-JNIEXPORT jlong JNICALL
Java_org_apache_spark_util_sketch_VeloxBloomFilterJniWrapper_init( // NOLINT
+JNIEXPORT jlong JNICALL
Java_org_apache_gluten_utils_VeloxBloomFilterJniWrapper_init( // NOLINT
JNIEnv* env,
jobject wrapper,
jbyteArray data) {
@@ -169,7 +169,7 @@ JNIEXPORT jlong JNICALL
Java_org_apache_spark_util_sketch_VeloxBloomFilterJniWra
JNI_METHOD_END(gluten::kInvalidResourceHandle)
}
-JNIEXPORT void JNICALL
Java_org_apache_spark_util_sketch_VeloxBloomFilterJniWrapper_insertLong( //
NOLINT
+JNIEXPORT void JNICALL
Java_org_apache_gluten_utils_VeloxBloomFilterJniWrapper_insertLong( // NOLINT
JNIEnv* env,
jobject wrapper,
jlong handle,
@@ -182,7 +182,7 @@ JNIEXPORT void JNICALL
Java_org_apache_spark_util_sketch_VeloxBloomFilterJniWrap
JNI_METHOD_END()
}
-JNIEXPORT jboolean JNICALL
Java_org_apache_spark_util_sketch_VeloxBloomFilterJniWrapper_mightContainLong(
// NOLINT
+JNIEXPORT jboolean JNICALL
Java_org_apache_gluten_utils_VeloxBloomFilterJniWrapper_mightContainLong( //
NOLINT
JNIEnv* env,
jobject wrapper,
jlong handle,
@@ -207,7 +207,7 @@ static std::vector<char>
serialize(BloomFilter<std::allocator<uint64_t>>* bf) {
}
} // namespace
-JNIEXPORT void JNICALL
Java_org_apache_spark_util_sketch_VeloxBloomFilterJniWrapper_mergeFrom( //
NOLINT
+JNIEXPORT void JNICALL
Java_org_apache_gluten_utils_VeloxBloomFilterJniWrapper_mergeFrom( // NOLINT
JNIEnv* env,
jobject wrapper,
jlong handle,
@@ -223,7 +223,7 @@ JNIEXPORT void JNICALL
Java_org_apache_spark_util_sketch_VeloxBloomFilterJniWrap
JNI_METHOD_END()
}
-JNIEXPORT jbyteArray JNICALL
Java_org_apache_spark_util_sketch_VeloxBloomFilterJniWrapper_serialize( //
NOLINT
+JNIEXPORT jbyteArray JNICALL
Java_org_apache_gluten_utils_VeloxBloomFilterJniWrapper_serialize( // NOLINT
JNIEnv* env,
jobject wrapper,
jlong handle) {
diff --git a/cpp/velox/symbols.map b/cpp/velox/symbols.map
index 42518c387..ebd2b9af0 100644
--- a/cpp/velox/symbols.map
+++ b/cpp/velox/symbols.map
@@ -5,7 +5,7 @@
*facebook::velox::*;
};
- Java_org_apache_gluten*;
+ Java_org_apache_gluten_*;
local:
# Hide symbols of static dependencies
*;
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]