This is an automated email from the ASF dual-hosted git repository.

comphead pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/datafusion-comet.git


The following commit(s) were added to refs/heads/main by this push:
     new aee298dd9 feat: add sort_array benchmark (#3758)
aee298dd9 is described below

commit aee298dd932f363e8f536ec80e2aefcf2fc7fddd
Author: Han-Wen Tsao <[email protected]>
AuthorDate: Mon Mar 23 03:09:04 2026 +0800

    feat: add sort_array benchmark (#3758)
---
 .../benchmark/CometArrayExpressionBenchmark.scala  | 108 +++++++++++++++++++++
 1 file changed, 108 insertions(+)

diff --git 
a/spark/src/test/scala/org/apache/spark/sql/benchmark/CometArrayExpressionBenchmark.scala
 
b/spark/src/test/scala/org/apache/spark/sql/benchmark/CometArrayExpressionBenchmark.scala
new file mode 100644
index 000000000..784923ca2
--- /dev/null
+++ 
b/spark/src/test/scala/org/apache/spark/sql/benchmark/CometArrayExpressionBenchmark.scala
@@ -0,0 +1,108 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.spark.sql.benchmark
+
+/**
+ * Benchmark to measure performance of Comet array expressions. To run this 
benchmark:
+ * {{{
+ *   SPARK_GENERATE_BENCHMARK_FILES=1 make 
benchmark-org.apache.spark.sql.benchmark.CometArrayExpressionBenchmark
+ * }}}
+ * Results will be written to 
"spark/benchmarks/CometArrayExpressionBenchmark-**results.txt".
+ */
+object CometArrayExpressionBenchmark extends CometBenchmarkBase {
+
+  private def buildWideIntArrayExpr(width: Int, modulus: Int): String = {
+    require(width > 0, "width must be positive")
+
+    (0 until width)
+      .map { i =>
+        val seed = 13 + i * 17
+        if (i % 11 == 0) {
+          s"CASE WHEN value % 32 = 0 THEN NULL ELSE CAST((value * $seed + $i) 
% $modulus AS INT) END"
+        } else {
+          s"CAST((value * $seed + $i) % $modulus AS INT)"
+        }
+      }
+      .mkString("array(", ",\n                ", ")")
+  }
+
+  private def prepareSortArrayTable(width: Int)(f: => Unit): Unit = {
+    val intArrayExpr = buildWideIntArrayExpr(width, modulus = width * 32)
+    withTempPath { dir =>
+      withTempTable("parquetV1Table") {
+        prepareTable(
+          dir,
+          spark.sql(s"""
+            SELECT
+              $intArrayExpr AS int_arr
+            FROM $tbl
+          """))
+        f
+      }
+    }
+  }
+
+  def sortArrayIntAscBenchmark(values: Int, width: Int): Unit = {
+    prepareSortArrayTable(width) {
+      runExpressionBenchmark(
+        s"sort_array int ascending (width=$width)",
+        values,
+        "SELECT sort_array(int_arr) FROM parquetV1Table")
+    }
+  }
+
+  def sortArrayIntDescBenchmark(values: Int, width: Int): Unit = {
+    prepareSortArrayTable(width) {
+      runExpressionBenchmark(
+        s"sort_array int descending (width=$width)",
+        values,
+        "SELECT sort_array(int_arr, false) FROM parquetV1Table")
+    }
+  }
+
+  def sortArrayIntAscFirstElementBenchmark(values: Int, width: Int): Unit = {
+    prepareSortArrayTable(width) {
+      runExpressionBenchmark(
+        s"element_at(sort_array(int_arr), 1) (width=$width)",
+        values,
+        "SELECT element_at(sort_array(int_arr), 1) FROM parquetV1Table")
+    }
+  }
+
+  override def runCometBenchmark(mainArgs: Array[String]): Unit = {
+    val values = 4 * 1024 * 1024
+
+    runBenchmarkWithTable("sortArrayIntAsc", values) { v =>
+      sortArrayIntAscBenchmark(v, width = 16)
+    }
+
+    runBenchmarkWithTable("sortArrayIntDesc", values) { v =>
+      sortArrayIntDescBenchmark(v, width = 16)
+    }
+
+    runBenchmarkWithTable("sortArrayIntAscWide", values) { v =>
+      sortArrayIntAscBenchmark(v, width = 32)
+    }
+
+    runBenchmarkWithTable("sortArrayIntAscFirstElement", values) { v =>
+      sortArrayIntAscFirstElementBenchmark(v, width = 32)
+    }
+  }
+}


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to