This is an automated email from the ASF dual-hosted git repository.
comphead pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/datafusion-comet.git
The following commit(s) were added to refs/heads/main by this push:
new aee298dd9 feat: add sort_array benchmark (#3758)
aee298dd9 is described below
commit aee298dd932f363e8f536ec80e2aefcf2fc7fddd
Author: Han-Wen Tsao <[email protected]>
AuthorDate: Mon Mar 23 03:09:04 2026 +0800
feat: add sort_array benchmark (#3758)
---
.../benchmark/CometArrayExpressionBenchmark.scala | 108 +++++++++++++++++++++
1 file changed, 108 insertions(+)
diff --git
a/spark/src/test/scala/org/apache/spark/sql/benchmark/CometArrayExpressionBenchmark.scala
b/spark/src/test/scala/org/apache/spark/sql/benchmark/CometArrayExpressionBenchmark.scala
new file mode 100644
index 000000000..784923ca2
--- /dev/null
+++
b/spark/src/test/scala/org/apache/spark/sql/benchmark/CometArrayExpressionBenchmark.scala
@@ -0,0 +1,108 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.spark.sql.benchmark
+
+/**
+ * Benchmark to measure performance of Comet array expressions. To run this
benchmark:
+ * {{{
+ * SPARK_GENERATE_BENCHMARK_FILES=1 make
benchmark-org.apache.spark.sql.benchmark.CometArrayExpressionBenchmark
+ * }}}
+ * Results will be written to
"spark/benchmarks/CometArrayExpressionBenchmark-**results.txt".
+ */
+object CometArrayExpressionBenchmark extends CometBenchmarkBase {
+
+ private def buildWideIntArrayExpr(width: Int, modulus: Int): String = {
+ require(width > 0, "width must be positive")
+
+ (0 until width)
+ .map { i =>
+ val seed = 13 + i * 17
+ if (i % 11 == 0) {
+ s"CASE WHEN value % 32 = 0 THEN NULL ELSE CAST((value * $seed + $i)
% $modulus AS INT) END"
+ } else {
+ s"CAST((value * $seed + $i) % $modulus AS INT)"
+ }
+ }
+ .mkString("array(", ",\n ", ")")
+ }
+
+ private def prepareSortArrayTable(width: Int)(f: => Unit): Unit = {
+ val intArrayExpr = buildWideIntArrayExpr(width, modulus = width * 32)
+ withTempPath { dir =>
+ withTempTable("parquetV1Table") {
+ prepareTable(
+ dir,
+ spark.sql(s"""
+ SELECT
+ $intArrayExpr AS int_arr
+ FROM $tbl
+ """))
+ f
+ }
+ }
+ }
+
+ def sortArrayIntAscBenchmark(values: Int, width: Int): Unit = {
+ prepareSortArrayTable(width) {
+ runExpressionBenchmark(
+ s"sort_array int ascending (width=$width)",
+ values,
+ "SELECT sort_array(int_arr) FROM parquetV1Table")
+ }
+ }
+
+ def sortArrayIntDescBenchmark(values: Int, width: Int): Unit = {
+ prepareSortArrayTable(width) {
+ runExpressionBenchmark(
+ s"sort_array int descending (width=$width)",
+ values,
+ "SELECT sort_array(int_arr, false) FROM parquetV1Table")
+ }
+ }
+
+ def sortArrayIntAscFirstElementBenchmark(values: Int, width: Int): Unit = {
+ prepareSortArrayTable(width) {
+ runExpressionBenchmark(
+ s"element_at(sort_array(int_arr), 1) (width=$width)",
+ values,
+ "SELECT element_at(sort_array(int_arr), 1) FROM parquetV1Table")
+ }
+ }
+
+ override def runCometBenchmark(mainArgs: Array[String]): Unit = {
+ val values = 4 * 1024 * 1024
+
+ runBenchmarkWithTable("sortArrayIntAsc", values) { v =>
+ sortArrayIntAscBenchmark(v, width = 16)
+ }
+
+ runBenchmarkWithTable("sortArrayIntDesc", values) { v =>
+ sortArrayIntDescBenchmark(v, width = 16)
+ }
+
+ runBenchmarkWithTable("sortArrayIntAscWide", values) { v =>
+ sortArrayIntAscBenchmark(v, width = 32)
+ }
+
+ runBenchmarkWithTable("sortArrayIntAscFirstElement", values) { v =>
+ sortArrayIntAscFirstElementBenchmark(v, width = 32)
+ }
+ }
+}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]