This is an automated email from the ASF dual-hosted git repository. agrove pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/datafusion-comet.git
The following commit(s) were added to refs/heads/main by this push: new 84dca9fad feat: Support more date part expressions (#2316) 84dca9fad is described below commit 84dca9fad1163c3a81f47b42b5a490db119953b6 Author: Zhen Wang <643348...@qq.com> AuthorDate: Wed Sep 17 00:27:31 2025 +0800 feat: Support more date part expressions (#2316) --- docs/source/user-guide/latest/expressions.md | 32 +++-- .../org/apache/comet/serde/QueryPlanSerde.scala | 11 +- .../scala/org/apache/comet/serde/datetime.scala | 132 ++++++++++++++++++++- .../org/apache/comet/CometExpressionSuite.scala | 9 +- 4 files changed, 162 insertions(+), 22 deletions(-) diff --git a/docs/source/user-guide/latest/expressions.md b/docs/source/user-guide/latest/expressions.md index 5a459866e..c29d0202b 100644 --- a/docs/source/user-guide/latest/expressions.md +++ b/docs/source/user-guide/latest/expressions.md @@ -91,19 +91,25 @@ incompatible expressions. ## Date/Time Functions -| Expression | SQL | Spark-Compatible? | Compatibility Notes | -| -------------- | ---------------------------- | ----------------- | ----------------------------------------------------------------------------- | -| DateAdd | `date_add` | Yes | | -| DateSub | `date_sub` | Yes | | -| DatePart | `date_part(field, source)` | Yes | Only `year` is supported | -| Extract | `extract(field FROM source)` | Yes | Only `year` is supported | -| FromUnixTime | `from_unixtime` | No | Does not support format, supports only -8334601211038 <= sec <= 8210266876799 | -| Hour | `hour` | Yes | | -| Minute | `minute` | Yes | | -| Second | `second` | Yes | | -| TruncDate | `trunc` | Yes | | -| TruncTimestamp | `trunc_date` | Yes | | -| Year | `year` | Yes | | +| Expression | SQL | Spark-Compatible? | Compatibility Notes | +| -------------- |------------------------------| ----------------- |------------------------------------------------------------------------------------------------------| +| DateAdd | `date_add` | Yes | | +| DateSub | `date_sub` | Yes | | +| DatePart | `date_part(field, source)` | Yes | Supported values of `field`: `year`/`month`/`week`/`day`/`dayofweek`/`doy`/`quarter`/`hour`/`minute` | +| Extract | `extract(field FROM source)` | Yes | Supported values of `field`: `year`/`month`/`week`/`day`/`dayofweek`/`doy`/`quarter`/`hour`/`minute` | +| FromUnixTime | `from_unixtime` | No | Does not support format, supports only -8334601211038 <= sec <= 8210266876799 | +| Hour | `hour` | Yes | | +| Minute | `minute` | Yes | | +| Second | `second` | Yes | | +| TruncDate | `trunc` | Yes | | +| TruncTimestamp | `trunc_date` | Yes | | +| Year | `year` | Yes | | +| Month | `month` | Yes | | +| DayOfMonth | `day`/`dayofmonth` | Yes | | +| DayOfWeek | `dayofweek` | Yes | | +| DayOfYear | `dayofyear` | Yes | | +| WeekOfYear | `weekofyear` | Yes | | +| Quarter | `quarter` | Yes | | ## Math Expressions diff --git a/spark/src/main/scala/org/apache/comet/serde/QueryPlanSerde.scala b/spark/src/main/scala/org/apache/comet/serde/QueryPlanSerde.scala index 0f3b1bf1f..6fd68569f 100644 --- a/spark/src/main/scala/org/apache/comet/serde/QueryPlanSerde.scala +++ b/spark/src/main/scala/org/apache/comet/serde/QueryPlanSerde.scala @@ -203,7 +203,16 @@ object QueryPlanSerde extends Logging with CometExprShim { classOf[Second] -> CometSecond, classOf[TruncDate] -> CometTruncDate, classOf[TruncTimestamp] -> CometTruncTimestamp, - classOf[Year] -> CometYear) + classOf[Year] -> CometYear, + classOf[Month] -> CometMonth, + classOf[DayOfMonth] -> CometDayOfMonth, + classOf[DayOfWeek] -> CometDayOfWeek, + // FIXME: current datafusion version does not support isodow (WeekDay) + // , see: https://github.com/apache/datafusion-comet/issues/2330 + // classOf[WeekDay] -> CometWeekDay, + classOf[DayOfYear] -> CometDayOfYear, + classOf[WeekOfYear] -> CometWeekOfYear, + classOf[Quarter] -> CometQuarter) private val conversionExpressions: Map[Class[_ <: Expression], CometExpressionSerde[_]] = Map( classOf[Cast] -> CometCast) diff --git a/spark/src/main/scala/org/apache/comet/serde/datetime.scala b/spark/src/main/scala/org/apache/comet/serde/datetime.scala index 327254e72..8e4c92d70 100644 --- a/spark/src/main/scala/org/apache/comet/serde/datetime.scala +++ b/spark/src/main/scala/org/apache/comet/serde/datetime.scala @@ -19,19 +19,43 @@ package org.apache.comet.serde -import org.apache.spark.sql.catalyst.expressions.{Attribute, DateAdd, DateSub, Hour, Literal, Minute, Second, TruncDate, TruncTimestamp, Year} +import org.apache.spark.sql.catalyst.expressions.{Attribute, DateAdd, DateSub, DayOfMonth, DayOfWeek, DayOfYear, GetDateField, Hour, Literal, Minute, Month, Quarter, Second, TruncDate, TruncTimestamp, WeekDay, WeekOfYear, Year} import org.apache.spark.sql.types.{DateType, IntegerType} import org.apache.comet.CometSparkSessionExtensions.withInfo +import org.apache.comet.serde.CometGetDateField.CometGetDateField import org.apache.comet.serde.ExprOuterClass.Expr import org.apache.comet.serde.QueryPlanSerde.{exprToProtoInternal, optExprWithInfo, scalarFunctionExprToProto, scalarFunctionExprToProtoWithReturnType, serializeDataType} -object CometYear extends CometExpressionSerde[Year] { - override def convert( - expr: Year, +private object CometGetDateField extends Enumeration { + type CometGetDateField = Value + + // See: https://datafusion.apache.org/user-guide/sql/scalar_functions.html#date-part + val Year: Value = Value("year") + val Month: Value = Value("month") + val DayOfMonth: Value = Value("day") + // Datafusion: day of the week where Sunday is 0, but spark sunday is 1 (1 = Sunday, + // 2 = Monday, ..., 7 = Saturday). + val DayOfWeek: Value = Value("dow") + val DayOfYear: Value = Value("doy") + val WeekDay: Value = Value("isodow") // day of the week where Monday is 0 + val WeekOfYear: Value = Value("week") + val Quarter: Value = Value("quarter") +} + +/** + * Convert spark [[org.apache.spark.sql.catalyst.expressions.GetDateField]] expressions to + * Datafusion + * [[https://datafusion.apache.org/user-guide/sql/scalar_functions.html#date-part datepart]] + * function. + */ +trait CometExprGetDateField[T <: GetDateField] { + def getDateField( + expr: T, + field: CometGetDateField, inputs: Seq[Attribute], binding: Boolean): Option[ExprOuterClass.Expr] = { - val periodType = exprToProtoInternal(Literal("year"), inputs, binding) + val periodType = exprToProtoInternal(Literal(field.toString), inputs, binding) val childExpr = exprToProtoInternal(expr.child, inputs, binding) val optExpr = scalarFunctionExprToProto("datepart", Seq(periodType, childExpr): _*) .map(e => { @@ -51,6 +75,104 @@ object CometYear extends CometExpressionSerde[Year] { } } +object CometYear extends CometExpressionSerde[Year] with CometExprGetDateField[Year] { + override def convert( + expr: Year, + inputs: Seq[Attribute], + binding: Boolean): Option[ExprOuterClass.Expr] = { + getDateField(expr, CometGetDateField.Year, inputs, binding) + } +} + +object CometMonth extends CometExpressionSerde[Month] with CometExprGetDateField[Month] { + override def convert( + expr: Month, + inputs: Seq[Attribute], + binding: Boolean): Option[ExprOuterClass.Expr] = { + getDateField(expr, CometGetDateField.Month, inputs, binding) + } +} + +object CometDayOfMonth + extends CometExpressionSerde[DayOfMonth] + with CometExprGetDateField[DayOfMonth] { + override def convert( + expr: DayOfMonth, + inputs: Seq[Attribute], + binding: Boolean): Option[ExprOuterClass.Expr] = { + getDateField(expr, CometGetDateField.DayOfMonth, inputs, binding) + } +} + +object CometDayOfWeek + extends CometExpressionSerde[DayOfWeek] + with CometExprGetDateField[DayOfWeek] { + override def convert( + expr: DayOfWeek, + inputs: Seq[Attribute], + binding: Boolean): Option[ExprOuterClass.Expr] = { + // Datafusion: day of the week where Sunday is 0, but spark sunday is 1 (1 = Sunday, + // 2 = Monday, ..., 7 = Saturday). So we need to add 1 to the result of datepart(dow, ...) + val optExpr = getDateField(expr, CometGetDateField.DayOfWeek, inputs, binding) + .zip(exprToProtoInternal(Literal(1), inputs, binding)) + .map { case (left, right) => + Expr + .newBuilder() + .setAdd( + ExprOuterClass.MathExpr + .newBuilder() + .setLeft(left) + .setRight(right) + .setEvalMode(ExprOuterClass.EvalMode.LEGACY) + .setReturnType(serializeDataType(IntegerType).get) + .build()) + .build() + } + .headOption + optExprWithInfo(optExpr, expr, expr.child) + } +} + +object CometWeekDay extends CometExpressionSerde[WeekDay] with CometExprGetDateField[WeekDay] { + override def convert( + expr: WeekDay, + inputs: Seq[Attribute], + binding: Boolean): Option[ExprOuterClass.Expr] = { + getDateField(expr, CometGetDateField.WeekDay, inputs, binding) + } +} + +object CometDayOfYear + extends CometExpressionSerde[DayOfYear] + with CometExprGetDateField[DayOfYear] { + override def convert( + expr: DayOfYear, + inputs: Seq[Attribute], + binding: Boolean): Option[ExprOuterClass.Expr] = { + getDateField(expr, CometGetDateField.DayOfYear, inputs, binding) + } +} + +object CometWeekOfYear + extends CometExpressionSerde[WeekOfYear] + with CometExprGetDateField[WeekOfYear] { + override def convert( + expr: WeekOfYear, + inputs: Seq[Attribute], + binding: Boolean): Option[ExprOuterClass.Expr] = { + getDateField(expr, CometGetDateField.WeekOfYear, inputs, binding) + } +} + +object CometQuarter extends CometExpressionSerde[Quarter] with CometExprGetDateField[Quarter] { + override def convert( + expr: Quarter, + inputs: Seq[Attribute], + binding: Boolean): Option[ExprOuterClass.Expr] = { + getDateField(expr, CometGetDateField.Quarter, inputs, binding) + } +} + object CometHour extends CometExpressionSerde[Hour] { override def convert( expr: Hour, diff --git a/spark/src/test/scala/org/apache/comet/CometExpressionSuite.scala b/spark/src/test/scala/org/apache/comet/CometExpressionSuite.scala index ee22c9b97..0e1d4fc24 100644 --- a/spark/src/test/scala/org/apache/comet/CometExpressionSuite.scala +++ b/spark/src/test/scala/org/apache/comet/CometExpressionSuite.scala @@ -1715,14 +1715,17 @@ class CometExpressionSuite extends CometTestBase with AdaptiveSparkPlanHelper { } } - test("Year") { + test("DatePart functions: Year/Month/DayOfMonth/DayOfWeek/DayOfYear/WeekOfYear/Quarter") { Seq(false, true).foreach { dictionary => withSQLConf("parquet.enable.dictionary" -> dictionary.toString) { val table = "test" withTable(table) { sql(s"create table $table(col timestamp) using parquet") - sql(s"insert into $table values (now()), (null)") - checkSparkAnswerAndOperator(s"SELECT year(col) FROM $table") + sql(s"insert into $table values (now()), (timestamp('1900-01-01')), (null)") + // TODO: weekday(col) https://github.com/apache/datafusion-comet/issues/2330 + checkSparkAnswerAndOperator( + "SELECT col, year(col), month(col), day(col)," + + s" dayofweek(col), dayofyear(col), weekofyear(col), quarter(col) FROM $table") } } } --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@datafusion.apache.org For additional commands, e-mail: commits-h...@datafusion.apache.org