MaxGekk commented on code in PR #56637:
URL: https://github.com/apache/spark/pull/56637#discussion_r3450943095
##########
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala:
##########
@@ -1183,6 +1225,19 @@ case class DateFormatClass(left: Expression, right:
Expression, timeZoneId: Opti
final override def nodePatternsInternal(): Seq[TreePattern] = Seq(DATETIME)
}
+object DateFormatClass {
+ /** Helper for codegen: formats time with proper Spark error on invalid
pattern. */
+ def formatTimeWithError(
+ tf: TimeFormatter, nanos: Long, funcName: String, pattern: String):
UTF8String = {
+ try {
+ UTF8String.fromString(tf.format(nanos))
+ } catch {
+ case e: java.time.DateTimeException =>
+ throw QueryExecutionErrors.invalidPatternError(funcName, pattern, e)
Review Comment:
Following up on my own earlier suggestion to reuse this helper — having now
seen the rendered message, it's misleading for these functions.
`invalidPatternError` hardcodes `parameter -> toSQLId("regexp")` (it was
written for regexp functions), so this produces `The value of parameter(s)
``regexp`` in ``to_char`` is invalid: '<pattern>'` — but
`to_char`/`to_varchar`/`date_format` have no `regexp` parameter. And `funcName`
is the literal `"to_char"` here and at the two codegen sites, yet
`DateFormatClass` also backs `date_format` and `to_varchar`, so those are
misreported.
Minimal fix is to pass `prettyName` instead of the literal. Fully dropping
the `regexp` parameter would need a datetime-specific message rather than the
regexp-oriented helper — your call whether that's worth it here, but the
message is what users will see.
##########
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala:
##########
@@ -1139,34 +1139,76 @@ case class DateFormatClass(left: Expression, right:
Expression, timeZoneId: Opti
def this(left: Expression, right: Expression) = this(left, right, None)
override def inputTypes: Seq[AbstractDataType] =
- Seq(TimestampType, StringTypeWithCollation(supportsTrimCollation = true))
+ Seq(TypeCollection(TimestampType, AnyTimeType),
+ StringTypeWithCollation(supportsTrimCollation = true))
override def withTimeZone(timeZoneId: String): TimeZoneAwareExpression =
copy(timeZoneId = Option(timeZoneId))
override protected def nullSafeEval(timestamp: Any, format: Any): Any = {
- val formatter = formatterOption.getOrElse(getFormatter(format.toString))
- UTF8String.fromString(formatter.format(timestamp.asInstanceOf[Long]))
+ left.dataType match {
+ case _: TimeType =>
+ val tf = timeFormatterOption.getOrElse(
+ TimeFormatter(format.toString, isParsing = false))
+ DateFormatClass.formatTimeWithError(
+ tf, timestamp.asInstanceOf[Long], "to_char", format.toString)
+ case _ =>
+ val formatter =
formatterOption.getOrElse(getFormatter(format.toString))
+ UTF8String.fromString(formatter.format(timestamp.asInstanceOf[Long]))
+ }
}
+ @transient private lazy val timeFormatterOption: Option[TimeFormatter] =
+ if (left.dataType.isInstanceOf[TimeType] && right.foldable) {
+ Option(right.eval()).map(fmt => TimeFormatter(fmt.toString, isParsing =
false))
+ } else None
+
override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
- formatterOption.map { tf =>
- val timestampFormatter = ctx.addReferenceObj("timestampFormatter", tf)
- defineCodeGen(ctx, ev, (timestamp, _) => {
- s"""UTF8String.fromString($timestampFormatter.format($timestamp))"""
- })
- }.getOrElse {
- val tf = TimestampFormatter.getClass.getName.stripSuffix("$")
- val ldf = LegacyDateFormats.getClass.getName.stripSuffix("$")
- val zid = ctx.addReferenceObj("zoneId", zoneId, classOf[ZoneId].getName)
- defineCodeGen(ctx, ev, (timestamp, format) => {
- s"""|UTF8String.fromString($tf$$.MODULE$$.apply(
- | $format.toString(),
- | $zid,
- | $ldf$$.MODULE$$.SIMPLE_DATE_FORMAT(),
- | false)
- |.format($timestamp))""".stripMargin
- })
+ left.dataType match {
+ case _: TimeType =>
+ val errClass = QueryExecutionErrors.getClass.getName.stripSuffix("$")
Review Comment:
`errClass` is never used — the error wrapping now lives inside
`formatTimeWithError`, so this is leftover. Remove:
```suggestion
```
##########
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala:
##########
@@ -1183,6 +1225,19 @@ case class DateFormatClass(left: Expression, right:
Expression, timeZoneId: Opti
final override def nodePatternsInternal(): Seq[TreePattern] = Seq(DATETIME)
}
+object DateFormatClass {
+ /** Helper for codegen: formats time with proper Spark error on invalid
pattern. */
Review Comment:
This helper is called from both `nullSafeEval` (interpreted) and
`doGenCode`, not codegen only:
```suggestion
/** Formats a TIME value, mapping an invalid pattern to a Spark error.
Used by both eval and codegen. */
```
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]