MaxGekk commented on code in PR #41078:
URL: https://github.com/apache/spark/pull/41078#discussion_r1192941467
##########
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/TimestampFormatter.scala:
##########
@@ -163,27 +165,63 @@ class Iso8601TimestampFormatter(
protected lazy val legacyFormatter = TimestampFormatter.getLegacyFormatter(
pattern, zoneId, locale, legacyFormat)
+ override def parseOptional(s: String): Option[Long] = {
+ try {
+ val parsed = formatter.parseUnresolved(s, new ParsePosition(0))
+ if (parsed != null) {
+ Some(extractMicros(parsed))
+ } else {
+ None
+ }
+ } catch {
+ case NonFatal(_) => None
+ }
+ }
+
+ private def extractMicros(parsed: TemporalAccessor): Long = {
+ val parsedZoneId = parsed.query(TemporalQueries.zone())
+ val timeZoneId = if (parsedZoneId == null) zoneId else parsedZoneId
+ val zonedDateTime = toZonedDateTime(parsed, timeZoneId)
+ val epochSeconds = zonedDateTime.toEpochSecond
+ val microsOfSecond = zonedDateTime.get(MICRO_OF_SECOND)
+ Math.addExact(Math.multiplyExact(epochSeconds, MICROS_PER_SECOND),
microsOfSecond)
+ }
+
override def parse(s: String): Long = {
try {
val parsed = formatter.parse(s)
- val parsedZoneId = parsed.query(TemporalQueries.zone())
- val timeZoneId = if (parsedZoneId == null) zoneId else parsedZoneId
- val zonedDateTime = toZonedDateTime(parsed, timeZoneId)
- val epochSeconds = zonedDateTime.toEpochSecond
- val microsOfSecond = zonedDateTime.get(MICRO_OF_SECOND)
-
- Math.addExact(Math.multiplyExact(epochSeconds, MICROS_PER_SECOND),
microsOfSecond)
+ extractMicros(parsed)
} catch checkParsedDiff(s, legacyFormatter.parse)
}
+ override def parseWithoutTimeZoneOptional(s: String, allowTimeZone:
Boolean): Option[Long] = {
+ try {
+ val parsed = formatter.parseUnresolved(s, new ParsePosition(0))
+ if (parsed != null) {
+ val (localDate, localTime) = extractDateAndTime(s, parsed,
allowTimeZone)
+ Some(DateTimeUtils.localDateTimeToMicros(LocalDateTime.of(localDate,
localTime)))
+ } else {
+ None
+ }
+ } catch {
+ case NonFatal(_) => None
+ }
+ }
+
+ private def extractDateAndTime(s: String, parsed: TemporalAccessor,
allowTimeZone: Boolean):
+ (LocalDate, LocalTime) = {
+ if (!allowTimeZone && parsed.query(TemporalQueries.zone()) != null) {
+ throw QueryExecutionErrors.cannotParseStringAsDataTypeError(pattern, s,
TimestampNTZType)
+ }
+ val localDate = toLocalDate(parsed)
+ val localTime = toLocalTime(parsed)
+ (localDate, localTime)
+ }
Review Comment:
Let's deduplicate the code, and put one more common line here:
```suggestion
private def extractMicrosNTZ(
s: String,
parsed: TemporalAccessor,
allowTimeZone: Boolean): Long = {
if (!allowTimeZone && parsed.query(TemporalQueries.zone()) != null) {
throw QueryExecutionErrors.cannotParseStringAsDataTypeError(pattern,
s, TimestampNTZType)
}
val localDate = toLocalDate(parsed)
val localTime = toLocalTime(parsed)
DateTimeUtils.localDateTimeToMicros(LocalDateTime.of(localDate,
localTime))
}
```
##########
sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/TimestampFormatterSuite.scala:
##########
@@ -472,4 +472,24 @@ class TimestampFormatterSuite extends
DatetimeFormatterSuite {
assert(
formatter.parseWithoutTimeZoneOptional("abc", false).isEmpty)
}
+
+ test("SPARK-39280: support returning optional parse results in the iso8601
formatter") {
+ val formatter = new Iso8601TimestampFormatter(
+ "yyyy-MM-dd HH:mm:ss.SSSS",
+ locale = DateFormatter.defaultLocale,
+ legacyFormat = LegacyDateFormats.SIMPLE_DATE_FORMAT,
+ isParsing = true, zoneId = DateTimeTestUtils.LA)
+ assert(formatter.parseOptional("9999-12-31
23:59:59.9990").contains(253402329599999000L))
+ assert(
+ formatter.parseWithoutTimeZoneOptional("9999-12-31 23:59:59.9990", false)
+ .contains(253402300799999000L))
Review Comment:
```suggestion
assert(formatter.parseWithoutTimeZoneOptional("9999-12-31
23:59:59.9990", false)
.contains(253402300799999000L))
```
##########
sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/TimestampFormatterSuite.scala:
##########
@@ -472,4 +472,24 @@ class TimestampFormatterSuite extends
DatetimeFormatterSuite {
assert(
formatter.parseWithoutTimeZoneOptional("abc", false).isEmpty)
}
+
+ test("SPARK-39280: support returning optional parse results in the iso8601
formatter") {
+ val formatter = new Iso8601TimestampFormatter(
+ "yyyy-MM-dd HH:mm:ss.SSSS",
+ locale = DateFormatter.defaultLocale,
+ legacyFormat = LegacyDateFormats.SIMPLE_DATE_FORMAT,
+ isParsing = true, zoneId = DateTimeTestUtils.LA)
+ assert(formatter.parseOptional("9999-12-31
23:59:59.9990").contains(253402329599999000L))
+ assert(
+ formatter.parseWithoutTimeZoneOptional("9999-12-31 23:59:59.9990", false)
+ .contains(253402300799999000L))
+ assert(formatter.parseOptional("abc").isEmpty)
+ assert(
+ formatter.parseWithoutTimeZoneOptional("abc", false).isEmpty)
Review Comment:
```suggestion
assert(formatter.parseWithoutTimeZoneOptional("abc", false).isEmpty)
```
##########
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/TimestampFormatter.scala:
##########
@@ -163,27 +165,63 @@ class Iso8601TimestampFormatter(
protected lazy val legacyFormatter = TimestampFormatter.getLegacyFormatter(
pattern, zoneId, locale, legacyFormat)
+ override def parseOptional(s: String): Option[Long] = {
+ try {
+ val parsed = formatter.parseUnresolved(s, new ParsePosition(0))
+ if (parsed != null) {
+ Some(extractMicros(parsed))
+ } else {
+ None
+ }
+ } catch {
+ case NonFatal(_) => None
+ }
+ }
+
+ private def extractMicros(parsed: TemporalAccessor): Long = {
+ val parsedZoneId = parsed.query(TemporalQueries.zone())
+ val timeZoneId = if (parsedZoneId == null) zoneId else parsedZoneId
+ val zonedDateTime = toZonedDateTime(parsed, timeZoneId)
+ val epochSeconds = zonedDateTime.toEpochSecond
+ val microsOfSecond = zonedDateTime.get(MICRO_OF_SECOND)
+ Math.addExact(Math.multiplyExact(epochSeconds, MICROS_PER_SECOND),
microsOfSecond)
+ }
+
override def parse(s: String): Long = {
try {
val parsed = formatter.parse(s)
- val parsedZoneId = parsed.query(TemporalQueries.zone())
- val timeZoneId = if (parsedZoneId == null) zoneId else parsedZoneId
- val zonedDateTime = toZonedDateTime(parsed, timeZoneId)
- val epochSeconds = zonedDateTime.toEpochSecond
- val microsOfSecond = zonedDateTime.get(MICRO_OF_SECOND)
-
- Math.addExact(Math.multiplyExact(epochSeconds, MICROS_PER_SECOND),
microsOfSecond)
+ extractMicros(parsed)
} catch checkParsedDiff(s, legacyFormatter.parse)
}
+ override def parseWithoutTimeZoneOptional(s: String, allowTimeZone:
Boolean): Option[Long] = {
+ try {
+ val parsed = formatter.parseUnresolved(s, new ParsePosition(0))
+ if (parsed != null) {
+ val (localDate, localTime) = extractDateAndTime(s, parsed,
allowTimeZone)
+ Some(DateTimeUtils.localDateTimeToMicros(LocalDateTime.of(localDate,
localTime)))
Review Comment:
```suggestion
Some(extractMicrosNTZ(s, parsed, allowTimeZone))
```
##########
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/TimestampFormatter.scala:
##########
@@ -163,27 +165,63 @@ class Iso8601TimestampFormatter(
protected lazy val legacyFormatter = TimestampFormatter.getLegacyFormatter(
pattern, zoneId, locale, legacyFormat)
+ override def parseOptional(s: String): Option[Long] = {
+ try {
+ val parsed = formatter.parseUnresolved(s, new ParsePosition(0))
+ if (parsed != null) {
+ Some(extractMicros(parsed))
+ } else {
+ None
+ }
+ } catch {
+ case NonFatal(_) => None
+ }
+ }
+
+ private def extractMicros(parsed: TemporalAccessor): Long = {
+ val parsedZoneId = parsed.query(TemporalQueries.zone())
+ val timeZoneId = if (parsedZoneId == null) zoneId else parsedZoneId
+ val zonedDateTime = toZonedDateTime(parsed, timeZoneId)
+ val epochSeconds = zonedDateTime.toEpochSecond
+ val microsOfSecond = zonedDateTime.get(MICRO_OF_SECOND)
+ Math.addExact(Math.multiplyExact(epochSeconds, MICROS_PER_SECOND),
microsOfSecond)
+ }
+
override def parse(s: String): Long = {
try {
val parsed = formatter.parse(s)
- val parsedZoneId = parsed.query(TemporalQueries.zone())
- val timeZoneId = if (parsedZoneId == null) zoneId else parsedZoneId
- val zonedDateTime = toZonedDateTime(parsed, timeZoneId)
- val epochSeconds = zonedDateTime.toEpochSecond
- val microsOfSecond = zonedDateTime.get(MICRO_OF_SECOND)
-
- Math.addExact(Math.multiplyExact(epochSeconds, MICROS_PER_SECOND),
microsOfSecond)
+ extractMicros(parsed)
} catch checkParsedDiff(s, legacyFormatter.parse)
}
+ override def parseWithoutTimeZoneOptional(s: String, allowTimeZone:
Boolean): Option[Long] = {
+ try {
+ val parsed = formatter.parseUnresolved(s, new ParsePosition(0))
+ if (parsed != null) {
+ val (localDate, localTime) = extractDateAndTime(s, parsed,
allowTimeZone)
+ Some(DateTimeUtils.localDateTimeToMicros(LocalDateTime.of(localDate,
localTime)))
+ } else {
+ None
+ }
+ } catch {
+ case NonFatal(_) => None
+ }
+ }
+
+ private def extractDateAndTime(s: String, parsed: TemporalAccessor,
allowTimeZone: Boolean):
+ (LocalDate, LocalTime) = {
+ if (!allowTimeZone && parsed.query(TemporalQueries.zone()) != null) {
+ throw QueryExecutionErrors.cannotParseStringAsDataTypeError(pattern, s,
TimestampNTZType)
+ }
+ val localDate = toLocalDate(parsed)
+ val localTime = toLocalTime(parsed)
+ (localDate, localTime)
+ }
+
override def parseWithoutTimeZone(s: String, allowTimeZone: Boolean): Long =
{
try {
val parsed = formatter.parse(s)
- if (!allowTimeZone && parsed.query(TemporalQueries.zone()) != null) {
- throw QueryExecutionErrors.cannotParseStringAsDataTypeError(pattern,
s, TimestampNTZType)
- }
- val localDate = toLocalDate(parsed)
- val localTime = toLocalTime(parsed)
+ val (localDate, localTime) = extractDateAndTime(s, parsed, allowTimeZone)
DateTimeUtils.localDateTimeToMicros(LocalDateTime.of(localDate,
localTime))
Review Comment:
Let's move the common line to `extractMicrosNTZ()`:
```scala
extractMicrosNTZ(s, parsed, allowTimeZone)
```
##########
sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/TimestampFormatterSuite.scala:
##########
@@ -472,4 +472,24 @@ class TimestampFormatterSuite extends
DatetimeFormatterSuite {
assert(
formatter.parseWithoutTimeZoneOptional("abc", false).isEmpty)
}
+
+ test("SPARK-39280: support returning optional parse results in the iso8601
formatter") {
+ val formatter = new Iso8601TimestampFormatter(
+ "yyyy-MM-dd HH:mm:ss.SSSS",
+ locale = DateFormatter.defaultLocale,
+ legacyFormat = LegacyDateFormats.SIMPLE_DATE_FORMAT,
+ isParsing = true, zoneId = DateTimeTestUtils.LA)
+ assert(formatter.parseOptional("9999-12-31
23:59:59.9990").contains(253402329599999000L))
+ assert(
+ formatter.parseWithoutTimeZoneOptional("9999-12-31 23:59:59.9990", false)
+ .contains(253402300799999000L))
+ assert(formatter.parseOptional("abc").isEmpty)
+ assert(
+ formatter.parseWithoutTimeZoneOptional("abc", false).isEmpty)
+
+ assert(formatter.parseOptional("2012-00-65 23:59:59.9990").isEmpty)
+ assert(
+ formatter.parseWithoutTimeZoneOptional("2012-00-65 23:59:59.9990", false)
+ .isEmpty)
Review Comment:
```suggestion
assert(formatter.parseWithoutTimeZoneOptional("2012-00-65
23:59:59.9990", false).isEmpty)
```
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]