This is an automated email from the ASF dual-hosted git repository.
agrove pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/datafusion-comet.git
The following commit(s) were added to refs/heads/main by this push:
new 5fa1e6942 feat: add support for unix_date expression (#3141)
5fa1e6942 is described below
commit 5fa1e6942afce0eb1ba2d10a4c907f3c416fd4af
Author: Andy Grove <[email protected]>
AuthorDate: Fri Jan 16 13:30:32 2026 -0700
feat: add support for unix_date expression (#3141)
---
docs/source/user-guide/latest/configs.md | 1 +
native/spark-expr/src/conversion_funcs/cast.rs | 6 ++++-
.../org/apache/comet/serde/QueryPlanSerde.scala | 1 +
.../scala/org/apache/comet/serde/datetime.scala | 29 +++++++++++++++++++++-
.../comet/CometTemporalExpressionSuite.scala | 21 ++++++++++++++++
5 files changed, 56 insertions(+), 2 deletions(-)
diff --git a/docs/source/user-guide/latest/configs.md
b/docs/source/user-guide/latest/configs.md
index 1a273ad03..53ed18910 100644
--- a/docs/source/user-guide/latest/configs.md
+++ b/docs/source/user-guide/latest/configs.md
@@ -333,6 +333,7 @@ These settings can be used to determine which parts of the
plan are accelerated
| `spark.comet.expression.TruncTimestamp.enabled` | Enable Comet acceleration
for `TruncTimestamp` | true |
| `spark.comet.expression.UnaryMinus.enabled` | Enable Comet acceleration for
`UnaryMinus` | true |
| `spark.comet.expression.Unhex.enabled` | Enable Comet acceleration for
`Unhex` | true |
+| `spark.comet.expression.UnixDate.enabled` | Enable Comet acceleration for
`UnixDate` | true |
| `spark.comet.expression.UnscaledValue.enabled` | Enable Comet acceleration
for `UnscaledValue` | true |
| `spark.comet.expression.Upper.enabled` | Enable Comet acceleration for
`Upper` | true |
| `spark.comet.expression.WeekDay.enabled` | Enable Comet acceleration for
`WeekDay` | true |
diff --git a/native/spark-expr/src/conversion_funcs/cast.rs
b/native/spark-expr/src/conversion_funcs/cast.rs
index 2ff1d8c55..9ccfc3e6a 100644
--- a/native/spark-expr/src/conversion_funcs/cast.rs
+++ b/native/spark-expr/src/conversion_funcs/cast.rs
@@ -1025,6 +1025,10 @@ fn cast_array(
cast_string_to_timestamp(&array, to_type, eval_mode,
&cast_options.timezone)
}
(Utf8, Date32) => cast_string_to_date(&array, to_type, eval_mode),
+ (Date32, Int32) => {
+ // Date32 is stored as days since epoch (i32), so this is a simple
reinterpret cast
+ Ok(cast_with_options(&array, to_type, &CAST_OPTIONS)?)
+ }
(Utf8, Float32 | Float64) => cast_string_to_float(&array, to_type,
eval_mode),
(Utf8 | LargeUtf8, Decimal128(precision, scale)) => {
cast_string_to_decimal(&array, to_type, precision, scale,
eval_mode)
@@ -1318,7 +1322,7 @@ fn is_datafusion_spark_compatible(from_type: &DataType,
to_type: &DataType) -> b
| DataType::Utf8 // note that there can be formatting
differences
),
DataType::Utf8 => matches!(to_type, DataType::Binary),
- DataType::Date32 => matches!(to_type, DataType::Utf8),
+ DataType::Date32 => matches!(to_type, DataType::Int32 |
DataType::Utf8),
DataType::Timestamp(_, _) => {
matches!(
to_type,
diff --git a/spark/src/main/scala/org/apache/comet/serde/QueryPlanSerde.scala
b/spark/src/main/scala/org/apache/comet/serde/QueryPlanSerde.scala
index e50b1d80e..2849aa6d3 100644
--- a/spark/src/main/scala/org/apache/comet/serde/QueryPlanSerde.scala
+++ b/spark/src/main/scala/org/apache/comet/serde/QueryPlanSerde.scala
@@ -186,6 +186,7 @@ object QueryPlanSerde extends Logging with CometExprShim {
private val temporalExpressions: Map[Class[_ <: Expression],
CometExpressionSerde[_]] = Map(
classOf[DateAdd] -> CometDateAdd,
classOf[DateSub] -> CometDateSub,
+ classOf[UnixDate] -> CometUnixDate,
classOf[FromUnixTime] -> CometFromUnixTime,
classOf[Hour] -> CometHour,
classOf[Minute] -> CometMinute,
diff --git a/spark/src/main/scala/org/apache/comet/serde/datetime.scala
b/spark/src/main/scala/org/apache/comet/serde/datetime.scala
index ef2b0f793..252f52478 100644
--- a/spark/src/main/scala/org/apache/comet/serde/datetime.scala
+++ b/spark/src/main/scala/org/apache/comet/serde/datetime.scala
@@ -21,7 +21,7 @@ package org.apache.comet.serde
import java.util.Locale
-import org.apache.spark.sql.catalyst.expressions.{Attribute, DateAdd, DateSub,
DayOfMonth, DayOfWeek, DayOfYear, GetDateField, Hour, Literal, Minute, Month,
Quarter, Second, TruncDate, TruncTimestamp, WeekDay, WeekOfYear, Year}
+import org.apache.spark.sql.catalyst.expressions.{Attribute, DateAdd, DateSub,
DayOfMonth, DayOfWeek, DayOfYear, GetDateField, Hour, Literal, Minute, Month,
Quarter, Second, TruncDate, TruncTimestamp, UnixDate, WeekDay, WeekOfYear, Year}
import org.apache.spark.sql.types.{DateType, IntegerType}
import org.apache.spark.unsafe.types.UTF8String
@@ -258,6 +258,33 @@ object CometDateAdd extends
CometScalarFunction[DateAdd]("date_add")
object CometDateSub extends CometScalarFunction[DateSub]("date_sub")
+/**
+ * Converts a date to the number of days since Unix epoch (1970-01-01). Since
dates are internally
+ * stored as days since epoch, this is a simple cast to integer.
+ */
+object CometUnixDate extends CometExpressionSerde[UnixDate] {
+ override def convert(
+ expr: UnixDate,
+ inputs: Seq[Attribute],
+ binding: Boolean): Option[ExprOuterClass.Expr] = {
+ val childExpr = exprToProtoInternal(expr.child, inputs, binding)
+ val optExpr = childExpr.map { child =>
+ Expr
+ .newBuilder()
+ .setCast(
+ ExprOuterClass.Cast
+ .newBuilder()
+ .setChild(child)
+ .setDatatype(serializeDataType(IntegerType).get)
+ .setEvalMode(ExprOuterClass.EvalMode.LEGACY)
+ .setAllowIncompat(false)
+ .build())
+ .build()
+ }
+ optExprWithInfo(optExpr, expr, expr.child)
+ }
+}
+
object CometTruncDate extends CometExpressionSerde[TruncDate] {
val supportedFormats: Seq[String] =
diff --git
a/spark/src/test/scala/org/apache/comet/CometTemporalExpressionSuite.scala
b/spark/src/test/scala/org/apache/comet/CometTemporalExpressionSuite.scala
index 9a23c76d8..3ab525ab6 100644
--- a/spark/src/test/scala/org/apache/comet/CometTemporalExpressionSuite.scala
+++ b/spark/src/test/scala/org/apache/comet/CometTemporalExpressionSuite.scala
@@ -122,4 +122,25 @@ class CometTemporalExpressionSuite extends CometTestBase
with AdaptiveSparkPlanH
StructField("fmt", DataTypes.StringType, true)))
FuzzDataGenerator.generateDataFrame(r, spark, schema, 1000,
DataGenOptions())
}
+
+ test("unix_date") {
+ val r = new Random(42)
+ val schema = StructType(Seq(StructField("c0", DataTypes.DateType, true)))
+ val df = FuzzDataGenerator.generateDataFrame(r, spark, schema, 1000,
DataGenOptions())
+ df.createOrReplaceTempView("tbl")
+
+ // Basic test
+ checkSparkAnswerAndOperator("SELECT c0, unix_date(c0) FROM tbl ORDER BY
c0")
+
+ // Test with literal dates
+ checkSparkAnswerAndOperator(
+ "SELECT unix_date(DATE('1970-01-01')), unix_date(DATE('1970-01-02')),
unix_date(DATE('2024-01-01'))")
+
+ // Test dates before Unix epoch (should return negative values)
+ checkSparkAnswerAndOperator(
+ "SELECT unix_date(DATE('1969-12-31')), unix_date(DATE('1960-01-01'))")
+
+ // Test null handling
+ checkSparkAnswerAndOperator("SELECT unix_date(NULL)")
+ }
}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]