This is an automated email from the ASF dual-hosted git repository.

agrove pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/datafusion-comet.git


The following commit(s) were added to refs/heads/main by this push:
     new 68f127b75 feat: add partial support for date_format expression (#3201)
68f127b75 is described below

commit 68f127b75373eafda882512f13d255cf4256c520
Author: Andy Grove <[email protected]>
AuthorDate: Sun Jan 18 21:24:55 2026 -0700

    feat: add partial support for date_format expression (#3201)
---
 docs/source/user-guide/latest/configs.md           |   1 +
 .../org/apache/comet/serde/QueryPlanSerde.scala    |   1 +
 .../scala/org/apache/comet/serde/datetime.scala    | 104 +++++++++++++++++-
 .../comet/CometTemporalExpressionSuite.scala       | 120 ++++++++++++++++++++-
 4 files changed, 223 insertions(+), 3 deletions(-)

diff --git a/docs/source/user-guide/latest/configs.md 
b/docs/source/user-guide/latest/configs.md
index 53ed18910..5eea5c4e5 100644
--- a/docs/source/user-guide/latest/configs.md
+++ b/docs/source/user-guide/latest/configs.md
@@ -234,6 +234,7 @@ These settings can be used to determine which parts of the 
plan are accelerated
 | `spark.comet.expression.CreateArray.enabled` | Enable Comet acceleration for 
`CreateArray` | true |
 | `spark.comet.expression.CreateNamedStruct.enabled` | Enable Comet 
acceleration for `CreateNamedStruct` | true |
 | `spark.comet.expression.DateAdd.enabled` | Enable Comet acceleration for 
`DateAdd` | true |
+| `spark.comet.expression.DateFormatClass.enabled` | Enable Comet acceleration 
for `DateFormatClass` | true |
 | `spark.comet.expression.DateSub.enabled` | Enable Comet acceleration for 
`DateSub` | true |
 | `spark.comet.expression.DayOfMonth.enabled` | Enable Comet acceleration for 
`DayOfMonth` | true |
 | `spark.comet.expression.DayOfWeek.enabled` | Enable Comet acceleration for 
`DayOfWeek` | true |
diff --git a/spark/src/main/scala/org/apache/comet/serde/QueryPlanSerde.scala 
b/spark/src/main/scala/org/apache/comet/serde/QueryPlanSerde.scala
index 2849aa6d3..3569559df 100644
--- a/spark/src/main/scala/org/apache/comet/serde/QueryPlanSerde.scala
+++ b/spark/src/main/scala/org/apache/comet/serde/QueryPlanSerde.scala
@@ -185,6 +185,7 @@ object QueryPlanSerde extends Logging with CometExprShim {
 
   private val temporalExpressions: Map[Class[_ <: Expression], 
CometExpressionSerde[_]] = Map(
     classOf[DateAdd] -> CometDateAdd,
+    classOf[DateFormatClass] -> CometDateFormat,
     classOf[DateSub] -> CometDateSub,
     classOf[UnixDate] -> CometUnixDate,
     classOf[FromUnixTime] -> CometFromUnixTime,
diff --git a/spark/src/main/scala/org/apache/comet/serde/datetime.scala 
b/spark/src/main/scala/org/apache/comet/serde/datetime.scala
index 252f52478..b191e8721 100644
--- a/spark/src/main/scala/org/apache/comet/serde/datetime.scala
+++ b/spark/src/main/scala/org/apache/comet/serde/datetime.scala
@@ -21,8 +21,8 @@ package org.apache.comet.serde
 
 import java.util.Locale
 
-import org.apache.spark.sql.catalyst.expressions.{Attribute, DateAdd, DateSub, 
DayOfMonth, DayOfWeek, DayOfYear, GetDateField, Hour, Literal, Minute, Month, 
Quarter, Second, TruncDate, TruncTimestamp, UnixDate, WeekDay, WeekOfYear, Year}
-import org.apache.spark.sql.types.{DateType, IntegerType}
+import org.apache.spark.sql.catalyst.expressions.{Attribute, DateAdd, 
DateFormatClass, DateSub, DayOfMonth, DayOfWeek, DayOfYear, GetDateField, Hour, 
Literal, Minute, Month, Quarter, Second, TruncDate, TruncTimestamp, UnixDate, 
WeekDay, WeekOfYear, Year}
+import org.apache.spark.sql.types.{DateType, IntegerType, StringType}
 import org.apache.spark.unsafe.types.UTF8String
 
 import org.apache.comet.CometSparkSessionExtensions.withInfo
@@ -381,3 +381,103 @@ object CometTruncTimestamp extends 
CometExpressionSerde[TruncTimestamp] {
     }
   }
 }
+
+/**
+ * Converts Spark DateFormatClass expression to DataFusion's to_char function.
+ *
+ * Spark uses Java SimpleDateFormat patterns while DataFusion uses strftime 
patterns. This
+ * implementation supports a whitelist of common format strings that can be 
reliably mapped
+ * between the two systems.
+ */
+object CometDateFormat extends CometExpressionSerde[DateFormatClass] {
+
+  /**
+   * Mapping from Spark SimpleDateFormat patterns to strftime patterns. Only 
formats in this map
+   * are supported.
+   */
+  val supportedFormats: Map[String, String] = Map(
+    // Full date formats
+    "yyyy-MM-dd" -> "%Y-%m-%d",
+    "yyyy/MM/dd" -> "%Y/%m/%d",
+    "yyyy-MM-dd HH:mm:ss" -> "%Y-%m-%d %H:%M:%S",
+    "yyyy/MM/dd HH:mm:ss" -> "%Y/%m/%d %H:%M:%S",
+    // Date components
+    "yyyy" -> "%Y",
+    "yy" -> "%y",
+    "MM" -> "%m",
+    "dd" -> "%d",
+    // Time formats
+    "HH:mm:ss" -> "%H:%M:%S",
+    "HH:mm" -> "%H:%M",
+    "HH" -> "%H",
+    "mm" -> "%M",
+    "ss" -> "%S",
+    // Combined formats
+    "yyyyMMdd" -> "%Y%m%d",
+    "yyyyMM" -> "%Y%m",
+    // Month and day names
+    "EEEE" -> "%A",
+    "EEE" -> "%a",
+    "MMMM" -> "%B",
+    "MMM" -> "%b",
+    // 12-hour time
+    "hh:mm:ss a" -> "%I:%M:%S %p",
+    "hh:mm a" -> "%I:%M %p",
+    "h:mm a" -> "%-I:%M %p",
+    // ISO formats
+    "yyyy-MM-dd'T'HH:mm:ss" -> "%Y-%m-%dT%H:%M:%S")
+
+  override def getSupportLevel(expr: DateFormatClass): SupportLevel = {
+    // Check timezone - only UTC is fully compatible
+    val timezone = expr.timeZoneId.getOrElse("UTC")
+    val isUtc = timezone == "UTC" || timezone == "Etc/UTC"
+
+    expr.right match {
+      case Literal(fmt: UTF8String, _) =>
+        val format = fmt.toString
+        if (supportedFormats.contains(format)) {
+          if (isUtc) {
+            Compatible()
+          } else {
+            Incompatible(Some(s"Non-UTC timezone '$timezone' may produce 
different results"))
+          }
+        } else {
+          Unsupported(
+            Some(
+              s"Format '$format' is not supported. Supported formats: " +
+                supportedFormats.keys.mkString(", ")))
+        }
+      case _ =>
+        Unsupported(Some("Only literal format strings are supported"))
+    }
+  }
+
+  override def convert(
+      expr: DateFormatClass,
+      inputs: Seq[Attribute],
+      binding: Boolean): Option[ExprOuterClass.Expr] = {
+    // Get the format string - must be a literal for us to map it
+    val strftimeFormat = expr.right match {
+      case Literal(fmt: UTF8String, _) =>
+        supportedFormats.get(fmt.toString)
+      case _ => None
+    }
+
+    strftimeFormat match {
+      case Some(format) =>
+        val childExpr = exprToProtoInternal(expr.left, inputs, binding)
+        val formatExpr = exprToProtoInternal(Literal(format), inputs, binding)
+
+        val optExpr = scalarFunctionExprToProtoWithReturnType(
+          "to_char",
+          StringType,
+          false,
+          childExpr,
+          formatExpr)
+        optExprWithInfo(optExpr, expr, expr.left, expr.right)
+      case None =>
+        withInfo(expr, expr.left, expr.right)
+        None
+    }
+  }
+}
diff --git 
a/spark/src/test/scala/org/apache/comet/CometTemporalExpressionSuite.scala 
b/spark/src/test/scala/org/apache/comet/CometTemporalExpressionSuite.scala
index 3ab525ab6..35976ffa9 100644
--- a/spark/src/test/scala/org/apache/comet/CometTemporalExpressionSuite.scala
+++ b/spark/src/test/scala/org/apache/comet/CometTemporalExpressionSuite.scala
@@ -26,7 +26,7 @@ import 
org.apache.spark.sql.execution.adaptive.AdaptiveSparkPlanHelper
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types.{DataTypes, StructField, StructType}
 
-import org.apache.comet.serde.{CometTruncDate, CometTruncTimestamp}
+import org.apache.comet.serde.{CometDateFormat, CometTruncDate, 
CometTruncTimestamp}
 import org.apache.comet.testing.{DataGenOptions, FuzzDataGenerator}
 
 class CometTemporalExpressionSuite extends CometTestBase with 
AdaptiveSparkPlanHelper {
@@ -123,6 +123,124 @@ class CometTemporalExpressionSuite extends CometTestBase 
with AdaptiveSparkPlanH
     FuzzDataGenerator.generateDataFrame(r, spark, schema, 1000, 
DataGenOptions())
   }
 
+  test("date_format with timestamp column") {
+    // Filter out formats with embedded quotes that need special handling
+    val supportedFormats = CometDateFormat.supportedFormats.keys.toSeq
+      .filterNot(_.contains("'"))
+
+    createTimestampTestData.createOrReplaceTempView("tbl")
+
+    withSQLConf(SQLConf.SESSION_LOCAL_TIMEZONE.key -> "UTC") {
+      for (format <- supportedFormats) {
+        checkSparkAnswerAndOperator(s"SELECT c0, date_format(c0, '$format') 
from tbl order by c0")
+      }
+      // Test ISO format with embedded quotes separately using double-quoted 
string
+      checkSparkAnswerAndOperator(
+        "SELECT c0, date_format(c0, \"yyyy-MM-dd'T'HH:mm:ss\") from tbl order 
by c0")
+    }
+  }
+
+  test("date_format with specific format strings") {
+    // Test specific format strings with explicit timestamp data
+    createTimestampTestData.createOrReplaceTempView("tbl")
+
+    withSQLConf(SQLConf.SESSION_LOCAL_TIMEZONE.key -> "UTC") {
+      // Date formats
+      checkSparkAnswerAndOperator("SELECT c0, date_format(c0, 'yyyy-MM-dd') 
from tbl order by c0")
+      checkSparkAnswerAndOperator("SELECT c0, date_format(c0, 'yyyy/MM/dd') 
from tbl order by c0")
+
+      // Time formats
+      checkSparkAnswerAndOperator("SELECT c0, date_format(c0, 'HH:mm:ss') from 
tbl order by c0")
+      checkSparkAnswerAndOperator("SELECT c0, date_format(c0, 'HH:mm') from 
tbl order by c0")
+
+      // Combined formats
+      checkSparkAnswerAndOperator(
+        "SELECT c0, date_format(c0, 'yyyy-MM-dd HH:mm:ss') from tbl order by 
c0")
+
+      // Day/month names
+      checkSparkAnswerAndOperator("SELECT c0, date_format(c0, 'EEEE') from tbl 
order by c0")
+      checkSparkAnswerAndOperator("SELECT c0, date_format(c0, 'MMMM') from tbl 
order by c0")
+
+      // 12-hour time
+      checkSparkAnswerAndOperator("SELECT c0, date_format(c0, 'hh:mm:ss a') 
from tbl order by c0")
+
+      // ISO format (use double single-quotes to escape the literal T)
+      checkSparkAnswerAndOperator(
+        "SELECT c0, date_format(c0, \"yyyy-MM-dd'T'HH:mm:ss\") from tbl order 
by c0")
+    }
+  }
+
+  test("date_format with literal timestamp") {
+    // Test specific literal timestamp formats
+    // Disable constant folding to ensure Comet actually executes the 
expression
+    withSQLConf(
+      SQLConf.SESSION_LOCAL_TIMEZONE.key -> "UTC",
+      SQLConf.OPTIMIZER_EXCLUDED_RULES.key ->
+        "org.apache.spark.sql.catalyst.optimizer.ConstantFolding") {
+      checkSparkAnswerAndOperator(
+        "SELECT date_format(TIMESTAMP '2024-03-15 14:30:45', 'yyyy-MM-dd')")
+      checkSparkAnswerAndOperator(
+        "SELECT date_format(TIMESTAMP '2024-03-15 14:30:45', 'yyyy-MM-dd 
HH:mm:ss')")
+      checkSparkAnswerAndOperator(
+        "SELECT date_format(TIMESTAMP '2024-03-15 14:30:45', 'HH:mm:ss')")
+      checkSparkAnswerAndOperator("SELECT date_format(TIMESTAMP '2024-03-15 
14:30:45', 'EEEE')")
+      checkSparkAnswerAndOperator(
+        "SELECT date_format(TIMESTAMP '2024-03-15 14:30:45', 'hh:mm:ss a')")
+    }
+  }
+
+  test("date_format with null") {
+    withSQLConf(
+      SQLConf.SESSION_LOCAL_TIMEZONE.key -> "UTC",
+      SQLConf.OPTIMIZER_EXCLUDED_RULES.key ->
+        "org.apache.spark.sql.catalyst.optimizer.ConstantFolding") {
+      checkSparkAnswerAndOperator("SELECT date_format(CAST(NULL AS TIMESTAMP), 
'yyyy-MM-dd')")
+    }
+  }
+
+  test("date_format unsupported format falls back to Spark") {
+    createTimestampTestData.createOrReplaceTempView("tbl")
+
+    withSQLConf(SQLConf.SESSION_LOCAL_TIMEZONE.key -> "UTC") {
+      // Unsupported format string
+      checkSparkAnswerAndFallbackReason(
+        "SELECT c0, date_format(c0, 'yyyy-MM-dd EEEE') from tbl order by c0",
+        "Format 'yyyy-MM-dd EEEE' is not supported")
+    }
+  }
+
+  test("date_format with non-UTC timezone falls back to Spark") {
+    createTimestampTestData.createOrReplaceTempView("tbl")
+
+    val nonUtcTimezones =
+      Seq("America/New_York", "America/Los_Angeles", "Europe/London", 
"Asia/Tokyo")
+
+    for (tz <- nonUtcTimezones) {
+      withSQLConf(SQLConf.SESSION_LOCAL_TIMEZONE.key -> tz) {
+        // Non-UTC timezones should fall back to Spark as Incompatible
+        checkSparkAnswerAndFallbackReason(
+          "SELECT c0, date_format(c0, 'yyyy-MM-dd HH:mm:ss') from tbl order by 
c0",
+          s"Non-UTC timezone '$tz' may produce different results")
+      }
+    }
+  }
+
+  test("date_format with non-UTC timezone works when allowIncompatible is 
enabled") {
+    createTimestampTestData.createOrReplaceTempView("tbl")
+
+    val nonUtcTimezones = Seq("America/New_York", "Europe/London", 
"Asia/Tokyo")
+
+    for (tz <- nonUtcTimezones) {
+      withSQLConf(
+        SQLConf.SESSION_LOCAL_TIMEZONE.key -> tz,
+        "spark.comet.expr.DateFormatClass.allowIncompatible" -> "true") {
+        // With allowIncompatible enabled, Comet will execute the expression
+        // Results may differ from Spark but should not throw errors
+        checkSparkAnswer("SELECT c0, date_format(c0, 'yyyy-MM-dd') from tbl 
order by c0")
+      }
+    }
+  }
+
   test("unix_date") {
     val r = new Random(42)
     val schema = StructType(Seq(StructField("c0", DataTypes.DateType, true)))


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to