bart-samwel commented on a change in pull request #28576:
URL: https://github.com/apache/spark/pull/28576#discussion_r427401584
##########
File path:
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeFormatterHelper.scala
##########
@@ -31,17 +31,50 @@ import org.apache.spark.sql.internal.SQLConf
import org.apache.spark.sql.internal.SQLConf.LegacyBehaviorPolicy._
trait DateTimeFormatterHelper {
+ private def getOrDefault(accessor: TemporalAccessor, field: ChronoField,
default: Int): Int = {
+ if (accessor.isSupported(field)) {
+ accessor.get(field)
+ } else {
+ default
+ }
+ }
+
+ protected def toLocalDate(accessor: TemporalAccessor, allowMissingYear:
Boolean): LocalDate = {
+ val year = if (accessor.isSupported(ChronoField.YEAR)) {
+ accessor.get(ChronoField.YEAR)
+ } else if (allowMissingYear) {
+ // To keep backward compatibility with Spark 2.x, we pick 1970 as the
default value of year.
+ 1970
+ } else {
+ throw new SparkUpgradeException("3.0",
+ "Year must be given in the date/timestamp string to be parsed. You can
set " +
Review comment:
Maybe also suggest the alternative workaround, e.g. prepending `'1970 '
to the string-to-be-parsed and prepending `'yyyy '` to the format string. That
one works without the legacy setting so I'd say it's preferred.
##########
File path:
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeFormatterHelper.scala
##########
@@ -31,17 +31,50 @@ import org.apache.spark.sql.internal.SQLConf
import org.apache.spark.sql.internal.SQLConf.LegacyBehaviorPolicy._
trait DateTimeFormatterHelper {
+ private def getOrDefault(accessor: TemporalAccessor, field: ChronoField,
default: Int): Int = {
+ if (accessor.isSupported(field)) {
+ accessor.get(field)
+ } else {
+ default
+ }
+ }
+
+ protected def toLocalDate(accessor: TemporalAccessor, allowMissingYear:
Boolean): LocalDate = {
+ val year = if (accessor.isSupported(ChronoField.YEAR)) {
+ accessor.get(ChronoField.YEAR)
+ } else if (allowMissingYear) {
+ // To keep backward compatibility with Spark 2.x, we pick 1970 as the
default value of year.
+ 1970
+ } else {
+ throw new SparkUpgradeException("3.0",
+ "Year must be given in the date/timestamp string to be parsed. You can
set " +
+ SQLConf.LEGACY_ALLOW_MISSING_YEAR_DURING_PARSING.key + " to true, to
pick 1970 as " +
+ "the default value of year.", null)
+ }
+ val month = getOrDefault(accessor, ChronoField.MONTH_OF_YEAR, 1)
Review comment:
Are we also going to error out if they specify the day but not the
month? Really, the only formats that make sense are the ones where a full
prefix is given in the y-m-d h-m-s sequence, and all others are likely to be a
case where they made a mistake (e.g. asked for "mm" twice where they meant MM).
##########
File path:
sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
##########
@@ -2586,6 +2586,15 @@ object SQLConf {
.checkValue(_ > 0, "The timeout value must be positive")
.createWithDefault(10L)
+ val LEGACY_ALLOW_MISSING_YEAR_DURING_PARSING =
+ buildConf("spark.sql.legacy.allowMissingYearDuringParsing")
+ .internal()
+ .doc("When true, DateFormatter/TimestampFormatter allows parsing
date/timestamp string " +
Review comment:
Here too, you could suggest the alternative workaround that doesn't
require setting the legacy flag.
----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]