[GitHub] [spark] cloud-fan commented on a change in pull request #32995: [SPARK-35839][SQL] New SQL function: to_timestamp_ntz

GitBox Tue, 22 Jun 2021 00:52:49 -0700


cloud-fan commented on a change in pull request #32995:
URL: https://github.com/apache/spark/pull/32995#discussion_r655537147




##########
File path: 
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala
##########
@@ -66,6 +66,10 @@ trait TimestampFormatterHelper extends 
TimeZoneAwareExpression {
 
   protected def isParsing: Boolean
 
+  // Whether the timestamp formatter is for TimestampWithoutTZType.
+  // If yes, the formatter is always `Iso8601TimestampFormatter`.

Review comment:
       Shall we put this logic in `TimestampFormatterHelper`?
   ```
   final protected def getFormatter(fmt: String): TimestampFormatter = {
     if (forTimestampWithoutTZ) new Iso8601TimestampFormatter(...) else ...
   }
   ```

##########
File path: 
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala
##########
@@ -995,6 +1000,77 @@ case class UnixTimestamp(
     copy(timeExp = newLeft, format = newRight)
 }
 
+case class GetTimestampWithoutTZ(
+    left: Expression,
+    right: Expression,
+    timeZoneId: Option[String] = None,
+    failOnError: Boolean = SQLConf.get.ansiEnabled) extends ToTimestamp {
+
+  override lazy val forTimestampWithoutTZ: Boolean = true

Review comment:
       it's a constant, no need to do lazy val.

##########
File path: 
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala
##########
@@ -1013,7 +1089,7 @@ abstract class ToTimestamp
   override def dataType: DataType = LongType
   override def nullable: Boolean = if (failOnError) 
children.exists(_.nullable) else true
 
-  private def isParseError(e: Throwable): Boolean = e match {
+  protected def isParseError(e: Throwable): Boolean = e match {

Review comment:
       unnecessary change?

##########
File path: 
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/TimestampFormatter.scala
##########
@@ -50,9 +51,31 @@ sealed trait TimestampFormatter extends Serializable {
   @throws(classOf[DateTimeException])
   def parse(s: String): Long
 
+  /**
+   * Parses a timestamp in a string and converts it to microseconds since Unix 
Epoch in local time.
+   *
+   * @param s - string with timestamp to parse
+   * @return microseconds since epoch.
+   * @throws ParseException can be thrown by legacy parser
+   * @throws DateTimeParseException can be thrown by new parser
+   * @throws DateTimeException unable to obtain local date or time
+   * @throws UnsupportedOperationException cannot use legacy formatter for the 
parsing
+   */
+  @throws(classOf[ParseException])
+  @throws(classOf[DateTimeParseException])
+  @throws(classOf[DateTimeException])
+  @throws(classOf[UnsupportedOperationException])
+  def parseWithoutTimeZone(s: String): Long =
+    throw QueryExecutionErrors.cannotUseLegacyFormatterForTimestampWithoutTZ();

Review comment:
       This is not a user-facing error: we should never hit it. We can just 
throw IllegelStateException here and no need to document this with `@throws`

##########
File path: 
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/TimestampFormatter.scala
##########
@@ -50,9 +51,31 @@ sealed trait TimestampFormatter extends Serializable {
   @throws(classOf[DateTimeException])
   def parse(s: String): Long
 
+  /**
+   * Parses a timestamp in a string and converts it to microseconds since Unix 
Epoch in local time.
+   *
+   * @param s - string with timestamp to parse
+   * @return microseconds since epoch.
+   * @throws ParseException can be thrown by legacy parser
+   * @throws DateTimeParseException can be thrown by new parser
+   * @throws DateTimeException unable to obtain local date or time
+   * @throws UnsupportedOperationException cannot use legacy formatter for the 
parsing
+   */
+  @throws(classOf[ParseException])
+  @throws(classOf[DateTimeParseException])
+  @throws(classOf[DateTimeException])
+  @throws(classOf[UnsupportedOperationException])
+  def parseWithoutTimeZone(s: String): Long =
+    throw QueryExecutionErrors.cannotUseLegacyFormatterForTimestampWithoutTZ();
+
   def format(us: Long): String
   def format(ts: Timestamp): String
   def format(instant: Instant): String
+
+  @throws(classOf[UnsupportedOperationException])

Review comment:
       ditto

##########
File path: sql/core/src/test/resources/sql-tests/results/ansi/datetime.sql.out
##########
@@ -936,6 +1198,33 @@ org.apache.spark.SparkUpgradeException
 You may get a different result due to the upgrading of Spark 3.0: Fail to 
recognize 'dd MM yyyy EEEEE' pattern in the DateTimeFormatter. 1) You can set 
spark.sql.legacy.timeParserPolicy to LEGACY to restore the behavior before 
Spark 3.0. 2) You can form a valid datetime pattern with the guide from 
https://spark.apache.org/docs/latest/sql-ref-datetime-pattern.html
 
 
+-- !query
+select to_timestamp_ntz('2019-10-06 A', 'yyyy-MM-dd GGGGG')
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.SparkUpgradeException
+You may get a different result due to the upgrading of Spark 3.0: Fail to 
recognize 'yyyy-MM-dd GGGGG' pattern in the DateTimeFormatter. 1) You can set 
spark.sql.legacy.timeParserPolicy to LEGACY to restore the behavior before 
Spark 3.0. 2) You can form a valid datetime pattern with the guide from 
https://spark.apache.org/docs/latest/sql-ref-datetime-pattern.html

Review comment:
       good point. The new function should bypass this upgrade check in the 
timestamp formatter.

##########
File path: 
sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkExecuteStatementOperation.scala
##########
@@ -379,6 +379,7 @@ object SparkExecuteStatementOperation {
         case CalendarIntervalType => StringType.catalogString
         case _: YearMonthIntervalType => "interval_year_month"
         case _: DayTimeIntervalType => "interval_day_time"
+        case _: TimestampWithoutTZType => "timestamp"

Review comment:
       We should probably map the existing timestamp to `TIMESTAMPLOCALTZ`

##########
File path: 
sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkExecuteStatementOperation.scala
##########
@@ -379,6 +379,7 @@ object SparkExecuteStatementOperation {
         case CalendarIntervalType => StringType.catalogString
         case _: YearMonthIntervalType => "interval_year_month"
         case _: DayTimeIntervalType => "interval_day_time"
+        case _: TimestampWithoutTZType => "timestamp"

Review comment:
       We should probably map the existing timestamp to `TIMESTAMPLOCALTZ`, we 
can discuss this in an individual PR.

##########
File path: 
sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala
##########
@@ -1225,6 +1225,12 @@ object QueryExecutionErrors {
     new RuntimeException("Remote operations not supported")
   }
 
+  def missingMethodForTimestampWithoutTZ(method: String): Throwable = {
+    new IllegalStateException(

Review comment:
       This is not a user-facing error and we should just inline it, similar to 
`UnresolvedAttribute.exprId`




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
[email protected]



---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

[GitHub] [spark] cloud-fan commented on a change in pull request #32995: [SPARK-35839][SQL] New SQL function: to_timestamp_ntz

Reply via email to