This is an automated email from the ASF dual-hosted git repository.

gurwls223 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/master by this push:
     new 73c7b12  [SPARK-26546][SQL] Caching of 
java.time.format.DateTimeFormatter
73c7b12 is described below

commit 73c7b126c6f477b38eba98232f2c8389a68676b8
Author: Maxim Gekk <max.g...@gmail.com>
AuthorDate: Thu Jan 10 10:32:20 2019 +0800

    [SPARK-26546][SQL] Caching of java.time.format.DateTimeFormatter
    
    ## What changes were proposed in this pull request?
    
    Added a cache for  java.time.format.DateTimeFormatter instances with keys 
consist of pattern and locale. This should allow to avoid parsing of 
timestamp/date patterns each time when new instance of 
`TimestampFormatter`/`DateFormatter` is created.
    
    ## How was this patch tested?
    
    By existing test suites `TimestampFormatterSuite`/`DateFormatterSuite` and 
`JsonFunctionsSuite`/`JsonSuite`.
    
    Closes #23462 from MaxGekk/time-formatter-caching.
    
    Lead-authored-by: Maxim Gekk <max.g...@gmail.com>
    Co-authored-by: Maxim Gekk <maxim.g...@databricks.com>
    Signed-off-by: Hyukjin Kwon <gurwls...@apache.org>
---
 .../spark/sql/catalyst/util/DateFormatter.scala    |  2 +-
 .../catalyst/util/DateTimeFormatterHelper.scala    | 51 ++++++++++++++++------
 .../sql/catalyst/util/TimestampFormatter.scala     |  2 +-
 3 files changed, 40 insertions(+), 15 deletions(-)

diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateFormatter.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateFormatter.scala
index b4c9967..db92552 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateFormatter.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateFormatter.scala
@@ -36,7 +36,7 @@ class Iso8601DateFormatter(
     locale: Locale) extends DateFormatter with DateTimeFormatterHelper {
 
   @transient
-  private lazy val formatter = buildFormatter(pattern, locale)
+  private lazy val formatter = getOrCreateFormatter(pattern, locale)
   private val UTC = ZoneId.of("UTC")
 
   private def toInstant(s: String): Instant = {
diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeFormatterHelper.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeFormatterHelper.scala
index 91cc57e..81ad6ad 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeFormatterHelper.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeFormatterHelper.scala
@@ -23,9 +23,46 @@ import java.time.format.{DateTimeFormatter, 
DateTimeFormatterBuilder, ResolverSt
 import java.time.temporal.{ChronoField, TemporalAccessor, TemporalQueries}
 import java.util.Locale
 
+import com.google.common.cache.CacheBuilder
+
+import org.apache.spark.sql.catalyst.util.DateTimeFormatterHelper._
+
 trait DateTimeFormatterHelper {
+  protected def toInstantWithZoneId(temporalAccessor: TemporalAccessor, 
zoneId: ZoneId): Instant = {
+    val localTime = if (temporalAccessor.query(TemporalQueries.localTime) == 
null) {
+      LocalTime.ofNanoOfDay(0)
+    } else {
+      LocalTime.from(temporalAccessor)
+    }
+    val localDate = LocalDate.from(temporalAccessor)
+    val localDateTime = LocalDateTime.of(localDate, localTime)
+    val zonedDateTime = ZonedDateTime.of(localDateTime, zoneId)
+    Instant.from(zonedDateTime)
+  }
+
+  // Gets a formatter from the cache or creates new one. The buildFormatter 
method can be called
+  // a few times with the same parameters in parallel if the cache does not 
contain values
+  // associated to those parameters. Since the formatter is immutable, it does 
not matter.
+  // In this way, synchronised is intentionally omitted in this method to make 
parallel calls
+  // less synchronised.
+  // The Cache.get method is not used here to avoid creation of additional 
instances of Callable.
+  protected def getOrCreateFormatter(pattern: String, locale: Locale): 
DateTimeFormatter = {
+    val key = (pattern, locale)
+    var formatter = cache.getIfPresent(key)
+    if (formatter == null) {
+      formatter = buildFormatter(pattern, locale)
+      cache.put(key, formatter)
+    }
+    formatter
+  }
+}
 
-  protected def buildFormatter(pattern: String, locale: Locale): 
DateTimeFormatter = {
+private object DateTimeFormatterHelper {
+  val cache = CacheBuilder.newBuilder()
+    .maximumSize(128)
+    .build[(String, Locale), DateTimeFormatter]()
+
+  def buildFormatter(pattern: String, locale: Locale): DateTimeFormatter = {
     new DateTimeFormatterBuilder()
       .parseCaseInsensitive()
       .appendPattern(pattern)
@@ -38,16 +75,4 @@ trait DateTimeFormatterHelper {
       .withChronology(IsoChronology.INSTANCE)
       .withResolverStyle(ResolverStyle.STRICT)
   }
-
-  protected def toInstantWithZoneId(temporalAccessor: TemporalAccessor, 
zoneId: ZoneId): Instant = {
-    val localTime = if (temporalAccessor.query(TemporalQueries.localTime) == 
null) {
-      LocalTime.ofNanoOfDay(0)
-    } else {
-      LocalTime.from(temporalAccessor)
-    }
-    val localDate = LocalDate.from(temporalAccessor)
-    val localDateTime = LocalDateTime.of(localDate, localTime)
-    val zonedDateTime = ZonedDateTime.of(localDateTime, zoneId)
-    Instant.from(zonedDateTime)
-  }
 }
diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/TimestampFormatter.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/TimestampFormatter.scala
index b67b2d7..8042099 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/TimestampFormatter.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/TimestampFormatter.scala
@@ -51,7 +51,7 @@ class Iso8601TimestampFormatter(
     timeZone: TimeZone,
     locale: Locale) extends TimestampFormatter with DateTimeFormatterHelper {
   @transient
-  private lazy val formatter = buildFormatter(pattern, locale)
+  private lazy val formatter = getOrCreateFormatter(pattern, locale)
 
   private def toInstant(s: String): Instant = {
     val temporalAccessor = formatter.parse(s)


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

Reply via email to