cloud-fan commented on a change in pull request #32995:
URL: https://github.com/apache/spark/pull/32995#discussion_r655537147
##########
File path:
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala
##########
@@ -66,6 +66,10 @@ trait TimestampFormatterHelper extends
TimeZoneAwareExpression {
protected def isParsing: Boolean
+ // Whether the timestamp formatter is for TimestampWithoutTZType.
+ // If yes, the formatter is always `Iso8601TimestampFormatter`.
Review comment:
Shall we put this logic in `TimestampFormatterHelper`?
```
final protected def getFormatter(fmt: String): TimestampFormatter = {
if (forTimestampWithoutTZ) new Iso8601TimestampFormatter(...) else ...
}
```
##########
File path:
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala
##########
@@ -995,6 +1000,77 @@ case class UnixTimestamp(
copy(timeExp = newLeft, format = newRight)
}
+case class GetTimestampWithoutTZ(
+ left: Expression,
+ right: Expression,
+ timeZoneId: Option[String] = None,
+ failOnError: Boolean = SQLConf.get.ansiEnabled) extends ToTimestamp {
+
+ override lazy val forTimestampWithoutTZ: Boolean = true
Review comment:
it's a constant, no need to do lazy val.
##########
File path:
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala
##########
@@ -1013,7 +1089,7 @@ abstract class ToTimestamp
override def dataType: DataType = LongType
override def nullable: Boolean = if (failOnError)
children.exists(_.nullable) else true
- private def isParseError(e: Throwable): Boolean = e match {
+ protected def isParseError(e: Throwable): Boolean = e match {
Review comment:
unnecessary change?
##########
File path:
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/TimestampFormatter.scala
##########
@@ -50,9 +51,31 @@ sealed trait TimestampFormatter extends Serializable {
@throws(classOf[DateTimeException])
def parse(s: String): Long
+ /**
+ * Parses a timestamp in a string and converts it to microseconds since Unix
Epoch in local time.
+ *
+ * @param s - string with timestamp to parse
+ * @return microseconds since epoch.
+ * @throws ParseException can be thrown by legacy parser
+ * @throws DateTimeParseException can be thrown by new parser
+ * @throws DateTimeException unable to obtain local date or time
+ * @throws UnsupportedOperationException cannot use legacy formatter for the
parsing
+ */
+ @throws(classOf[ParseException])
+ @throws(classOf[DateTimeParseException])
+ @throws(classOf[DateTimeException])
+ @throws(classOf[UnsupportedOperationException])
+ def parseWithoutTimeZone(s: String): Long =
+ throw QueryExecutionErrors.cannotUseLegacyFormatterForTimestampWithoutTZ();
Review comment:
This is not a user-facing error: we should never hit it. We can just
throw IllegelStateException here and no need to document this with `@throws`
##########
File path:
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/TimestampFormatter.scala
##########
@@ -50,9 +51,31 @@ sealed trait TimestampFormatter extends Serializable {
@throws(classOf[DateTimeException])
def parse(s: String): Long
+ /**
+ * Parses a timestamp in a string and converts it to microseconds since Unix
Epoch in local time.
+ *
+ * @param s - string with timestamp to parse
+ * @return microseconds since epoch.
+ * @throws ParseException can be thrown by legacy parser
+ * @throws DateTimeParseException can be thrown by new parser
+ * @throws DateTimeException unable to obtain local date or time
+ * @throws UnsupportedOperationException cannot use legacy formatter for the
parsing
+ */
+ @throws(classOf[ParseException])
+ @throws(classOf[DateTimeParseException])
+ @throws(classOf[DateTimeException])
+ @throws(classOf[UnsupportedOperationException])
+ def parseWithoutTimeZone(s: String): Long =
+ throw QueryExecutionErrors.cannotUseLegacyFormatterForTimestampWithoutTZ();
+
def format(us: Long): String
def format(ts: Timestamp): String
def format(instant: Instant): String
+
+ @throws(classOf[UnsupportedOperationException])
Review comment:
ditto
##########
File path: sql/core/src/test/resources/sql-tests/results/ansi/datetime.sql.out
##########
@@ -936,6 +1198,33 @@ org.apache.spark.SparkUpgradeException
You may get a different result due to the upgrading of Spark 3.0: Fail to
recognize 'dd MM yyyy EEEEE' pattern in the DateTimeFormatter. 1) You can set
spark.sql.legacy.timeParserPolicy to LEGACY to restore the behavior before
Spark 3.0. 2) You can form a valid datetime pattern with the guide from
https://spark.apache.org/docs/latest/sql-ref-datetime-pattern.html
+-- !query
+select to_timestamp_ntz('2019-10-06 A', 'yyyy-MM-dd GGGGG')
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.SparkUpgradeException
+You may get a different result due to the upgrading of Spark 3.0: Fail to
recognize 'yyyy-MM-dd GGGGG' pattern in the DateTimeFormatter. 1) You can set
spark.sql.legacy.timeParserPolicy to LEGACY to restore the behavior before
Spark 3.0. 2) You can form a valid datetime pattern with the guide from
https://spark.apache.org/docs/latest/sql-ref-datetime-pattern.html
Review comment:
good point. The new function should bypass this upgrade check in the
timestamp formatter.
##########
File path:
sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkExecuteStatementOperation.scala
##########
@@ -379,6 +379,7 @@ object SparkExecuteStatementOperation {
case CalendarIntervalType => StringType.catalogString
case _: YearMonthIntervalType => "interval_year_month"
case _: DayTimeIntervalType => "interval_day_time"
+ case _: TimestampWithoutTZType => "timestamp"
Review comment:
We should probably map the existing timestamp to `TIMESTAMPLOCALTZ`
##########
File path:
sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkExecuteStatementOperation.scala
##########
@@ -379,6 +379,7 @@ object SparkExecuteStatementOperation {
case CalendarIntervalType => StringType.catalogString
case _: YearMonthIntervalType => "interval_year_month"
case _: DayTimeIntervalType => "interval_day_time"
+ case _: TimestampWithoutTZType => "timestamp"
Review comment:
We should probably map the existing timestamp to `TIMESTAMPLOCALTZ`, we
can discuss this in an individual PR.
##########
File path:
sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala
##########
@@ -1225,6 +1225,12 @@ object QueryExecutionErrors {
new RuntimeException("Remote operations not supported")
}
+ def missingMethodForTimestampWithoutTZ(method: String): Throwable = {
+ new IllegalStateException(
Review comment:
This is not a user-facing error and we should just inline it, similar to
`UnresolvedAttribute.exprId`
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]