This is an automated email from the ASF dual-hosted git repository. gurwls223 pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push: new 73c7b12 [SPARK-26546][SQL] Caching of java.time.format.DateTimeFormatter 73c7b12 is described below commit 73c7b126c6f477b38eba98232f2c8389a68676b8 Author: Maxim Gekk <max.g...@gmail.com> AuthorDate: Thu Jan 10 10:32:20 2019 +0800 [SPARK-26546][SQL] Caching of java.time.format.DateTimeFormatter ## What changes were proposed in this pull request? Added a cache for java.time.format.DateTimeFormatter instances with keys consist of pattern and locale. This should allow to avoid parsing of timestamp/date patterns each time when new instance of `TimestampFormatter`/`DateFormatter` is created. ## How was this patch tested? By existing test suites `TimestampFormatterSuite`/`DateFormatterSuite` and `JsonFunctionsSuite`/`JsonSuite`. Closes #23462 from MaxGekk/time-formatter-caching. Lead-authored-by: Maxim Gekk <max.g...@gmail.com> Co-authored-by: Maxim Gekk <maxim.g...@databricks.com> Signed-off-by: Hyukjin Kwon <gurwls...@apache.org> --- .../spark/sql/catalyst/util/DateFormatter.scala | 2 +- .../catalyst/util/DateTimeFormatterHelper.scala | 51 ++++++++++++++++------ .../sql/catalyst/util/TimestampFormatter.scala | 2 +- 3 files changed, 40 insertions(+), 15 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateFormatter.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateFormatter.scala index b4c9967..db92552 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateFormatter.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateFormatter.scala @@ -36,7 +36,7 @@ class Iso8601DateFormatter( locale: Locale) extends DateFormatter with DateTimeFormatterHelper { @transient - private lazy val formatter = buildFormatter(pattern, locale) + private lazy val formatter = getOrCreateFormatter(pattern, locale) private val UTC = ZoneId.of("UTC") private def toInstant(s: String): Instant = { diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeFormatterHelper.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeFormatterHelper.scala index 91cc57e..81ad6ad 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeFormatterHelper.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeFormatterHelper.scala @@ -23,9 +23,46 @@ import java.time.format.{DateTimeFormatter, DateTimeFormatterBuilder, ResolverSt import java.time.temporal.{ChronoField, TemporalAccessor, TemporalQueries} import java.util.Locale +import com.google.common.cache.CacheBuilder + +import org.apache.spark.sql.catalyst.util.DateTimeFormatterHelper._ + trait DateTimeFormatterHelper { + protected def toInstantWithZoneId(temporalAccessor: TemporalAccessor, zoneId: ZoneId): Instant = { + val localTime = if (temporalAccessor.query(TemporalQueries.localTime) == null) { + LocalTime.ofNanoOfDay(0) + } else { + LocalTime.from(temporalAccessor) + } + val localDate = LocalDate.from(temporalAccessor) + val localDateTime = LocalDateTime.of(localDate, localTime) + val zonedDateTime = ZonedDateTime.of(localDateTime, zoneId) + Instant.from(zonedDateTime) + } + + // Gets a formatter from the cache or creates new one. The buildFormatter method can be called + // a few times with the same parameters in parallel if the cache does not contain values + // associated to those parameters. Since the formatter is immutable, it does not matter. + // In this way, synchronised is intentionally omitted in this method to make parallel calls + // less synchronised. + // The Cache.get method is not used here to avoid creation of additional instances of Callable. + protected def getOrCreateFormatter(pattern: String, locale: Locale): DateTimeFormatter = { + val key = (pattern, locale) + var formatter = cache.getIfPresent(key) + if (formatter == null) { + formatter = buildFormatter(pattern, locale) + cache.put(key, formatter) + } + formatter + } +} - protected def buildFormatter(pattern: String, locale: Locale): DateTimeFormatter = { +private object DateTimeFormatterHelper { + val cache = CacheBuilder.newBuilder() + .maximumSize(128) + .build[(String, Locale), DateTimeFormatter]() + + def buildFormatter(pattern: String, locale: Locale): DateTimeFormatter = { new DateTimeFormatterBuilder() .parseCaseInsensitive() .appendPattern(pattern) @@ -38,16 +75,4 @@ trait DateTimeFormatterHelper { .withChronology(IsoChronology.INSTANCE) .withResolverStyle(ResolverStyle.STRICT) } - - protected def toInstantWithZoneId(temporalAccessor: TemporalAccessor, zoneId: ZoneId): Instant = { - val localTime = if (temporalAccessor.query(TemporalQueries.localTime) == null) { - LocalTime.ofNanoOfDay(0) - } else { - LocalTime.from(temporalAccessor) - } - val localDate = LocalDate.from(temporalAccessor) - val localDateTime = LocalDateTime.of(localDate, localTime) - val zonedDateTime = ZonedDateTime.of(localDateTime, zoneId) - Instant.from(zonedDateTime) - } } diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/TimestampFormatter.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/TimestampFormatter.scala index b67b2d7..8042099 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/TimestampFormatter.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/TimestampFormatter.scala @@ -51,7 +51,7 @@ class Iso8601TimestampFormatter( timeZone: TimeZone, locale: Locale) extends TimestampFormatter with DateTimeFormatterHelper { @transient - private lazy val formatter = buildFormatter(pattern, locale) + private lazy val formatter = getOrCreateFormatter(pattern, locale) private def toInstant(s: String): Instant = { val temporalAccessor = formatter.parse(s) --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org