[GitHub] [spark] MaxGekk commented on a diff in pull request #40237: [SPARK-42635][SQL] Fix the TimestampAdd expression.

2023-03-02 Thread via GitHub


MaxGekk commented on code in PR #40237:
URL: https://github.com/apache/spark/pull/40237#discussion_r1122804677


##
sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/DateExpressionsSuite.scala:
##
@@ -1961,6 +1961,99 @@ class DateExpressionsSuite extends SparkFunSuite with 
ExpressionEvalHelper {
 }
   }
 
+  test("SPARK-42635: timestampadd near daylight saving transition") {
+// In America/Los_Angeles timezone, timestamp value `skippedTime` is 
2011-03-13 03:00:00.
+// The next second of 2011-03-13 01:59:59 jumps to 2011-03-13 03:00:00.
+val skippedTime = 13000104L
+// In America/Los_Angeles timezone, both timestamp range `[repeatedTime - 
MICROS_PER_HOUR,
+// repeatedTime)` and `[repeatedTime, repeatedTime + MICROS_PER_HOUR)` map 
to
+// [2011-11-06 01:00:00, 2011-11-06 02:00:00).
+// The next second of 2011-11-06 01:59:59 (pre-transition) jumps back to 
2011-11-06 01:00:00.
+val repeatedTime = 13205700L
+withSQLConf(SQLConf.SESSION_LOCAL_TIMEZONE.key -> LA.getId) {
+  // Adding one day is **not** equivalent to adding _PER_DAY time 
units, because not every
+  // day has 24 hours: 2011-03-13 has 23 hours, 2011-11-06 has 25 hours.
+
+  // timestampadd(DAY, 1, 2011-03-12 03:00:00) = 2011-03-13 03:00:00
+  checkEvaluation(
+TimestampAdd("DAY", Literal(1), Literal(skippedTime - 23 * 
MICROS_PER_HOUR, TimestampType)),
+skippedTime)
+  // timestampadd(HOUR, 24, 2011-03-12 03:00:00) = 2011-03-13 04:00:00
+  checkEvaluation(
+TimestampAdd("HOUR", Literal(24),
+  Literal(skippedTime - 23 * MICROS_PER_HOUR, TimestampType)),
+skippedTime + MICROS_PER_HOUR)
+  // timestampadd(HOUR, 23, 2011-03-12 03:00:00) = 2011-03-13 03:00:00
+  checkEvaluation(
+TimestampAdd("HOUR", Literal(23),
+  Literal(skippedTime - 23 * MICROS_PER_HOUR, TimestampType)),
+skippedTime)
+  // timestampadd(SECOND, SECONDS_PER_DAY, 2011-03-12 03:00:00) = 
2011-03-13 04:00:00
+  checkEvaluation(
+TimestampAdd(
+  "SECOND", Literal(SECONDS_PER_DAY.toInt),
+  Literal(skippedTime - 23 * MICROS_PER_HOUR, TimestampType)),
+skippedTime + MICROS_PER_HOUR)
+  // timestampadd(SECOND, SECONDS_PER_DAY, 2011-03-12 03:00:00) = 
2011-03-13 03:59:59
+  checkEvaluation(
+TimestampAdd(
+  "SECOND", Literal(SECONDS_PER_DAY.toInt - 1),
+  Literal(skippedTime - 23 * MICROS_PER_HOUR, TimestampType)),
+skippedTime + MICROS_PER_HOUR - MICROS_PER_SECOND)
+
+  // timestampadd(DAY, 1, 2011-11-05 02:00:00) = 2011-11-06 02:00:00
+  checkEvaluation(
+TimestampAdd("DAY", Literal(1),
+  Literal(repeatedTime - 24 * MICROS_PER_HOUR, TimestampType)),
+repeatedTime + MICROS_PER_HOUR)
+  // timestampadd(DAY, 1, 2011-11-05 01:00:00) = 2011-11-06 01:00:00 
(pre-transition)
+  checkEvaluation(
+TimestampAdd("DAY", Literal(1),
+  Literal(repeatedTime - 25 * MICROS_PER_HOUR, TimestampType)),
+repeatedTime - MICROS_PER_HOUR)
+  // timestampadd(DAY, -1, 2011-11-07 01:00:00) = 2011-11-06 01:00:00 
(post-transition)
+  checkEvaluation(
+TimestampAdd("DAY", Literal(-1),
+  Literal(repeatedTime + 24 * MICROS_PER_HOUR, TimestampType)),
+repeatedTime)
+  // timestampadd(MONTH, 1, 2011-10-06 01:00:00) = 2011-11-06 01:00:00 
(pre-transition)
+  checkEvaluation(
+TimestampAdd(
+  "MONTH", Literal(1),
+  Literal(repeatedTime - MICROS_PER_HOUR - 31 * MICROS_PER_DAY, 
TimestampType)),
+repeatedTime - MICROS_PER_HOUR)
+  // timestampadd(MONTH, -1, 2011-12-06 01:00:00) = 2011-11-06 01:00:00 
(post-transition)
+  checkEvaluation(
+TimestampAdd(
+  "MONTH", Literal(-1),
+  Literal(repeatedTime + 30 * MICROS_PER_DAY, TimestampType)),
+repeatedTime)
+  // timestampadd(HOUR, 23, 2011-11-05 02:00:00) = 2011-11-06 01:00:00 
(pre-transition)
+  checkEvaluation(
+TimestampAdd("HOUR", Literal(23),
+  Literal(repeatedTime - 24 * MICROS_PER_HOUR, TimestampType)),
+repeatedTime - MICROS_PER_HOUR)
+  // timestampadd(HOUR, 24, 2011-11-05 02:00:00) = 2011-11-06 01:00:00 
(post-transition)
+  checkEvaluation(
+TimestampAdd("HOUR", Literal(24),
+  Literal(repeatedTime - 24 * MICROS_PER_HOUR, TimestampType)),
+repeatedTime)
+}
+  }
+
+  test("SPARK-42635: timestampadd unit conversion overflow") {
+withSQLConf(SQLConf.SESSION_LOCAL_TIMEZONE.key -> "UTC") {
+  checkExceptionInExpression[SparkArithmeticException](TimestampAdd("DAY",

Review Comment:
   Could you use `checkErrorInExpression` instead of 
`checkExceptionInExpression`, please. This will allow to avoid the dependency 
of error message, so, tech editors will be able to modify `error-classes.json` 
w/o modifying Spark tests.



-- 
This is an automated 

[GitHub] [spark] MaxGekk commented on a diff in pull request #40237: [SPARK-42635][SQL] Fix the TimestampAdd expression.

2023-03-02 Thread via GitHub


MaxGekk commented on code in PR #40237:
URL: https://github.com/apache/spark/pull/40237#discussion_r1122804677


##
sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/DateExpressionsSuite.scala:
##
@@ -1961,6 +1961,99 @@ class DateExpressionsSuite extends SparkFunSuite with 
ExpressionEvalHelper {
 }
   }
 
+  test("SPARK-42635: timestampadd near daylight saving transition") {
+// In America/Los_Angeles timezone, timestamp value `skippedTime` is 
2011-03-13 03:00:00.
+// The next second of 2011-03-13 01:59:59 jumps to 2011-03-13 03:00:00.
+val skippedTime = 13000104L
+// In America/Los_Angeles timezone, both timestamp range `[repeatedTime - 
MICROS_PER_HOUR,
+// repeatedTime)` and `[repeatedTime, repeatedTime + MICROS_PER_HOUR)` map 
to
+// [2011-11-06 01:00:00, 2011-11-06 02:00:00).
+// The next second of 2011-11-06 01:59:59 (pre-transition) jumps back to 
2011-11-06 01:00:00.
+val repeatedTime = 13205700L
+withSQLConf(SQLConf.SESSION_LOCAL_TIMEZONE.key -> LA.getId) {
+  // Adding one day is **not** equivalent to adding _PER_DAY time 
units, because not every
+  // day has 24 hours: 2011-03-13 has 23 hours, 2011-11-06 has 25 hours.
+
+  // timestampadd(DAY, 1, 2011-03-12 03:00:00) = 2011-03-13 03:00:00
+  checkEvaluation(
+TimestampAdd("DAY", Literal(1), Literal(skippedTime - 23 * 
MICROS_PER_HOUR, TimestampType)),
+skippedTime)
+  // timestampadd(HOUR, 24, 2011-03-12 03:00:00) = 2011-03-13 04:00:00
+  checkEvaluation(
+TimestampAdd("HOUR", Literal(24),
+  Literal(skippedTime - 23 * MICROS_PER_HOUR, TimestampType)),
+skippedTime + MICROS_PER_HOUR)
+  // timestampadd(HOUR, 23, 2011-03-12 03:00:00) = 2011-03-13 03:00:00
+  checkEvaluation(
+TimestampAdd("HOUR", Literal(23),
+  Literal(skippedTime - 23 * MICROS_PER_HOUR, TimestampType)),
+skippedTime)
+  // timestampadd(SECOND, SECONDS_PER_DAY, 2011-03-12 03:00:00) = 
2011-03-13 04:00:00
+  checkEvaluation(
+TimestampAdd(
+  "SECOND", Literal(SECONDS_PER_DAY.toInt),
+  Literal(skippedTime - 23 * MICROS_PER_HOUR, TimestampType)),
+skippedTime + MICROS_PER_HOUR)
+  // timestampadd(SECOND, SECONDS_PER_DAY, 2011-03-12 03:00:00) = 
2011-03-13 03:59:59
+  checkEvaluation(
+TimestampAdd(
+  "SECOND", Literal(SECONDS_PER_DAY.toInt - 1),
+  Literal(skippedTime - 23 * MICROS_PER_HOUR, TimestampType)),
+skippedTime + MICROS_PER_HOUR - MICROS_PER_SECOND)
+
+  // timestampadd(DAY, 1, 2011-11-05 02:00:00) = 2011-11-06 02:00:00
+  checkEvaluation(
+TimestampAdd("DAY", Literal(1),
+  Literal(repeatedTime - 24 * MICROS_PER_HOUR, TimestampType)),
+repeatedTime + MICROS_PER_HOUR)
+  // timestampadd(DAY, 1, 2011-11-05 01:00:00) = 2011-11-06 01:00:00 
(pre-transition)
+  checkEvaluation(
+TimestampAdd("DAY", Literal(1),
+  Literal(repeatedTime - 25 * MICROS_PER_HOUR, TimestampType)),
+repeatedTime - MICROS_PER_HOUR)
+  // timestampadd(DAY, -1, 2011-11-07 01:00:00) = 2011-11-06 01:00:00 
(post-transition)
+  checkEvaluation(
+TimestampAdd("DAY", Literal(-1),
+  Literal(repeatedTime + 24 * MICROS_PER_HOUR, TimestampType)),
+repeatedTime)
+  // timestampadd(MONTH, 1, 2011-10-06 01:00:00) = 2011-11-06 01:00:00 
(pre-transition)
+  checkEvaluation(
+TimestampAdd(
+  "MONTH", Literal(1),
+  Literal(repeatedTime - MICROS_PER_HOUR - 31 * MICROS_PER_DAY, 
TimestampType)),
+repeatedTime - MICROS_PER_HOUR)
+  // timestampadd(MONTH, -1, 2011-12-06 01:00:00) = 2011-11-06 01:00:00 
(post-transition)
+  checkEvaluation(
+TimestampAdd(
+  "MONTH", Literal(-1),
+  Literal(repeatedTime + 30 * MICROS_PER_DAY, TimestampType)),
+repeatedTime)
+  // timestampadd(HOUR, 23, 2011-11-05 02:00:00) = 2011-11-06 01:00:00 
(pre-transition)
+  checkEvaluation(
+TimestampAdd("HOUR", Literal(23),
+  Literal(repeatedTime - 24 * MICROS_PER_HOUR, TimestampType)),
+repeatedTime - MICROS_PER_HOUR)
+  // timestampadd(HOUR, 24, 2011-11-05 02:00:00) = 2011-11-06 01:00:00 
(post-transition)
+  checkEvaluation(
+TimestampAdd("HOUR", Literal(24),
+  Literal(repeatedTime - 24 * MICROS_PER_HOUR, TimestampType)),
+repeatedTime)
+}
+  }
+
+  test("SPARK-42635: timestampadd unit conversion overflow") {
+withSQLConf(SQLConf.SESSION_LOCAL_TIMEZONE.key -> "UTC") {
+  checkExceptionInExpression[SparkArithmeticException](TimestampAdd("DAY",

Review Comment:
   Could you use `checkErrorInExpression` instead of 
`checkExceptionInExpression`, please. This will allow to avoid the dependency 
of error message, so, tech editors will be able to modify `error-classes.json` 
w/o the modifying Spark tests.



##