This is an automated email from the ASF dual-hosted git repository.
agrove pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/datafusion-comet.git
The following commit(s) were added to refs/heads/main by this push:
new c2ce72ab0 feat: Create microbenchmarks for Comet `cast` expression
(#2932)
c2ce72ab0 is described below
commit c2ce72ab054f71c80fddfa6da41d3be6c800bd88
Author: B Vadlamani <[email protected]>
AuthorDate: Mon Dec 22 08:48:53 2025 -0800
feat: Create microbenchmarks for Comet `cast` expression (#2932)
---
.../spark/sql/benchmark/CometCastBenchmark.scala | 128 +++++++++++++++++++++
1 file changed, 128 insertions(+)
diff --git
a/spark/src/test/scala/org/apache/spark/sql/benchmark/CometCastBenchmark.scala
b/spark/src/test/scala/org/apache/spark/sql/benchmark/CometCastBenchmark.scala
new file mode 100644
index 000000000..b2212dfd0
--- /dev/null
+++
b/spark/src/test/scala/org/apache/spark/sql/benchmark/CometCastBenchmark.scala
@@ -0,0 +1,128 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.spark.sql.benchmark
+
+import scala.util.Try
+
+import org.apache.spark.benchmark.Benchmark
+import org.apache.spark.sql.SparkSession
+import org.apache.spark.sql.internal.SQLConf
+import org.apache.spark.sql.types.{DataType, LongType}
+
+import org.apache.comet.CometConf
+import org.apache.comet.expressions.{CometCast, CometEvalMode}
+import org.apache.comet.serde.{Compatible, Incompatible, Unsupported}
+
+/**
+ * Benchmark to measure Comet execution performance. To run this benchmark:
+ * {{{
+ * SPARK_GENERATE_BENCHMARK_FILES=1 make
benchmark-org.apache.spark.sql.benchmark.CometCastBenchmark
+ * }}}
+ *
+ * Results will be written to
"spark/benchmarks/CometCastBenchmark-**results.txt".
+ */
+
+object CometCastBenchmark extends CometBenchmarkBase {
+
+ override def getSparkSession: SparkSession = {
+ val session = super.getSparkSession
+ session.conf.set("parquet.enable.dictionary", "false")
+ session.conf.set("spark.sql.shuffle.partitions", "2")
+ session
+ }
+
+ def castExprSQL(toDataType: DataType, input: String): String = {
+ s"CAST ($input AS ${toDataType.sql})"
+ }
+
+ override def runCometBenchmark(args: Array[String]): Unit = {
+
+ // TODO : Create all possible input datatypes. We only have Long inputs
for now
+ CometCast.supportedTypes.foreach { toDataType =>
+ Seq(false, true).foreach { ansiMode =>
+ CometCast.isSupported(
+ LongType,
+ toDataType,
+ None,
+ if (ansiMode) CometEvalMode.ANSI else CometEvalMode.LEGACY) match {
+ case Compatible(notes) =>
+ runBenchmarkWithTable(
+ s"Running benchmark cast operation from : $LongType to :
$toDataType",
+ 1024 * 1024 * 10) { v =>
+ castBenchmark(v, LongType, toDataType, isAnsiMode = ansiMode)
+ }
+ case Incompatible(notes) => None
+ case Unsupported(notes) => None
+ }
+ }
+ }
+ }
+
+ def castBenchmark(
+ values: Int,
+ fromDataType: DataType,
+ toDataType: DataType,
+ isAnsiMode: Boolean): Unit = {
+
+ val benchmark =
+ new Benchmark(
+ s"Cast function to : ${toDataType} , ansi mode enabled :
${isAnsiMode}",
+ values,
+ output = output)
+
+ withTempPath { dir =>
+ withTempTable("parquetV1Table") {
+ prepareTable(dir, spark.sql(s"SELECT value FROM $tbl"))
+ val functionSQL = castExprSQL(toDataType, "value")
+ val query = s"SELECT $functionSQL FROM parquetV1Table"
+
+ benchmark.addCase(
+ s"SQL Parquet - Spark Cast expr from ${fromDataType.sql} to :
${toDataType.sql} , " +
+ s"ansi mode enabled : ${isAnsiMode}") { _ =>
+ withSQLConf(SQLConf.ANSI_ENABLED.key -> isAnsiMode.toString) {
+ if (isAnsiMode) {
+ Try { spark.sql(query).noop() }
+ } else {
+ spark.sql(query).noop()
+ }
+ }
+ }
+
+ benchmark.addCase(
+ s"SQL Parquet - Comet Cast expr from ${fromDataType.sql} to :
${toDataType.sql} , " +
+ s"ansi mode enabled : ${isAnsiMode}") { _ =>
+ withSQLConf(
+ CometConf.COMET_ENABLED.key -> "true",
+ CometConf.COMET_EXEC_ENABLED.key -> "true",
+ SQLConf.ANSI_ENABLED.key -> isAnsiMode.toString) {
+ if (isAnsiMode) {
+ Try { spark.sql(query).noop() }
+ } else {
+ spark.sql(query).noop()
+ }
+ }
+ }
+ benchmark.run()
+ }
+ }
+
+ }
+
+}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]