MaxGekk commented on a change in pull request #27807: [SPARK-31076][SQL] 
Convert Catalyst's DATE/TIMESTAMP to Java Date/Timestamp via local date-time
URL: https://github.com/apache/spark/pull/27807#discussion_r390482084
 
 

 ##########
 File path: 
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala
 ##########
 @@ -86,28 +95,50 @@ object DateTimeUtils {
    * Returns the number of days since epoch from java.sql.Date.
    */
   def fromJavaDate(date: Date): SQLDate = {
-    microsToDays(millisToMicros(date.getTime))
+    if (date.getTime < GREGORIAN_CUTOVER_MILLIS) {
+      val era = if (date.before(julianCommonEraStart)) 0 else 1
+      val localDate = date.toLocalDate.`with`(ChronoField.ERA, era)
+      localDateToDays(localDate)
+    } else {
+      microsToDays(millisToMicros(date.getTime))
+    }
   }
 
   /**
    * Returns a java.sql.Date from number of days since epoch.
    */
   def toJavaDate(daysSinceEpoch: SQLDate): Date = {
-    new Date(microsToMillis(daysToMicros(daysSinceEpoch)))
+    if (daysSinceEpoch < GREGORIAN_CUTOVER_DAY) {
+      Date.valueOf(LocalDate.ofEpochDay(daysSinceEpoch))
+    } else {
+      new Date(microsToMillis(daysToMicros(daysSinceEpoch)))
+    }
   }
 
   /**
    * Returns a java.sql.Timestamp from number of micros since epoch.
    */
   def toJavaTimestamp(us: SQLTimestamp): Timestamp = {
-    Timestamp.from(microsToInstant(us))
+    if (us < GREGORIAN_CUTOVER_MICROS) {
+      val ldt = 
microsToInstant(us).atZone(ZoneId.systemDefault()).toLocalDateTime
+      Timestamp.valueOf(ldt)
+    } else {
+      Timestamp.from(microsToInstant(us))
+    }
   }
 
   /**
    * Returns the number of micros since epoch from java.sql.Timestamp.
    */
   def fromJavaTimestamp(t: Timestamp): SQLTimestamp = {
-    instantToMicros(t.toInstant)
+    if (t.getTime < GREGORIAN_CUTOVER_MILLIS) {
+      val era = if (t.before(julianCommonEraStart)) 0 else 1
+      val localDateTime = t.toLocalDateTime.`with`(ChronoField.ERA, era)
+      val instant = ZonedDateTime.of(localDateTime, 
ZoneId.systemDefault()).toInstant
+      instantToMicros(instant)
+    } else {
+      instantToMicros(t.toInstant)
+    }
 
 Review comment:
   Five cents to Wenchen's answers from my side, the 
`java.sql.Timestamp`/`java.sql.Date`/`java.util.Date` classes have mixes 
semantics. They hold "physical" duration in milliseconds + nanos together with 
zoned date-time. The duration from the epoch does not depend on calendar. The 
classes uses calendar in sync/conversion to zoned date-time. For example, when 
you take the `year` component via `getYear()`:
   ```java
       public int getYear() {
           return normalize().getYear() - 1900;
       }
   ```
   It sets `year`, `month`, `hour` ... in the `normalize` methods using the 
combined calendar Julian + Gregorian.
   
   If you use `java.sql.Timestamp` as a "container" for duration, it is ok. For 
example, we can return `java.sql.Timestamp`/`Date` in `collect()` and extract 
duration:
   ```scala
   scala> val ts = sql("select date '1100-10-10'").collect()(0).getDate(0)
   ts: java.sql.Date = 1100-10-03
   scala> 
java.time.Instant.ofEpochMilli(ts.getTime).atZone(java.time.ZoneId.of("Europe/Moscow")).toLocalDate
   res2: java.time.LocalDate = 1100-10-10
   ```
   but if you are going to use some methods of 
`java.sql.Timestamps`/`java.sql.Date`, they may return unexpected values for 
dates before `1582-10-15`.
   
   In the proposed PR, I changed the ways of initialization of 
`java.sql.Timestamp`/`Date` - from duration to local date-time. So, if we have 
an instant which is mapped to 1582-01-01 in Gregorian calendar, we take the 
local date and interpret it as local date `1582-01-01` in Julian calendar. In 
this way, date-time components returned by Spark SQL functions like `year()`, 
`months()` and etc. will equal to values returned by `java.sql.Date.getYear`, 
`java.sql.Date.getMonth` and etc.
   
   Of course, the duration in `java.sql.Timestamp`/`java.sql.Date` returned by 
the `getTime()` method will be different from the values stored in Spark's 
TIMESTAMP/DATE columns.

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
[email protected]


With regards,
Apache Git Services

---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to