This is an automated email from the ASF dual-hosted git repository. hvanhovell pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push: new 34db5f5 [SPARK-26618][SQL] Make typed Timestamp/Date literals consistent to casting 34db5f5 is described below commit 34db5f565271ad9d9d5134a4e914c10f1677a38a Author: Maxim Gekk <maxim.g...@databricks.com> AuthorDate: Fri Jan 18 12:47:36 2019 +0100 [SPARK-26618][SQL] Make typed Timestamp/Date literals consistent to casting ## What changes were proposed in this pull request? In the PR, I propose to make creation of typed Literals `TIMESTAMP` and `DATE` consistent to the `Cast` expression. More precisely, reusing the `Cast` expression in the type constructors. In this way, it allows: - To use the same calendar in parsing methods - To support the same set of timestamp/date patterns For example, creating timestamp literal: ```sql SELECT TIMESTAMP '2019-01-14 20:54:00.000' ``` behaves similarly as casting the string literal: ```sql SELECT CAST('2019-01-14 20:54:00.000' AS TIMESTAMP) ``` ## How was this patch tested? This was tested by `SQLQueryTestSuite` as well as `ExpressionParserSuite`. Closes #23541 from MaxGekk/timestamp-date-constructors. Lead-authored-by: Maxim Gekk <maxim.g...@databricks.com> Co-authored-by: Maxim Gekk <max.g...@gmail.com> Signed-off-by: Herman van Hovell <hvanhov...@databricks.com> --- .../spark/sql/catalyst/parser/AstBuilder.scala | 15 +++++--- .../catalyst/parser/ExpressionParserSuite.scala | 42 ++++++++++++++++++++++ .../resources/sql-tests/results/literals.sql.out | 4 +-- 3 files changed, 54 insertions(+), 7 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala index a27c6d3..24bbe11 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala @@ -17,7 +17,6 @@ package org.apache.spark.sql.catalyst.parser -import java.sql.{Date, Timestamp} import java.util.Locale import javax.xml.bind.DatatypeConverter @@ -37,9 +36,10 @@ import org.apache.spark.sql.catalyst.expressions.aggregate.{First, Last} import org.apache.spark.sql.catalyst.parser.SqlBaseParser._ import org.apache.spark.sql.catalyst.plans._ import org.apache.spark.sql.catalyst.plans.logical._ +import org.apache.spark.sql.catalyst.util.DateTimeUtils.{getTimeZone, stringToDate, stringToTimestamp} import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.types._ -import org.apache.spark.unsafe.types.CalendarInterval +import org.apache.spark.unsafe.types.{CalendarInterval, UTF8String} import org.apache.spark.util.random.RandomSampler /** @@ -1552,12 +1552,17 @@ class AstBuilder(conf: SQLConf) extends SqlBaseBaseVisitor[AnyRef] with Logging override def visitTypeConstructor(ctx: TypeConstructorContext): Literal = withOrigin(ctx) { val value = string(ctx.STRING) val valueType = ctx.identifier.getText.toUpperCase(Locale.ROOT) + def toLiteral[T](f: UTF8String => Option[T], t: DataType): Literal = { + f(UTF8String.fromString(value)).map(Literal(_, t)).getOrElse { + throw new ParseException(s"Cannot parse the $valueType value: $value", ctx) + } + } try { valueType match { - case "DATE" => - Literal(Date.valueOf(value)) + case "DATE" => toLiteral(stringToDate, DateType) case "TIMESTAMP" => - Literal(Timestamp.valueOf(value)) + val timeZone = getTimeZone(SQLConf.get.sessionLocalTimeZone) + toLiteral(stringToTimestamp(_, timeZone), TimestampType) case "X" => val padding = if (value.length % 2 != 0) "0" else "" Literal(DatatypeConverter.parseHexBinary(padding + value)) diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/ExpressionParserSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/ExpressionParserSuite.scala index 8bcc69d..7541d9d 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/ExpressionParserSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/ExpressionParserSuite.scala @@ -17,12 +17,15 @@ package org.apache.spark.sql.catalyst.parser import java.sql.{Date, Timestamp} +import java.time.LocalDateTime +import java.util.concurrent.TimeUnit import org.apache.spark.sql.catalyst.FunctionIdentifier import org.apache.spark.sql.catalyst.analysis.{UnresolvedAttribute, _} import org.apache.spark.sql.catalyst.expressions._ import org.apache.spark.sql.catalyst.expressions.aggregate.{First, Last} import org.apache.spark.sql.catalyst.plans.PlanTest +import org.apache.spark.sql.catalyst.util.DateTimeTestUtils import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.types._ import org.apache.spark.unsafe.types.CalendarInterval @@ -56,6 +59,13 @@ class ExpressionParserSuite extends PlanTest { } } + def assertEval( + sqlCommand: String, + expect: Any, + parser: ParserInterface = defaultParser): Unit = { + assert(parser.parseExpression(sqlCommand).eval() === expect) + } + test("star expressions") { // Global Star assertEqual("*", UnresolvedStar(None)) @@ -680,4 +690,36 @@ class ExpressionParserSuite extends PlanTest { assertEqual("last(a ignore nulls)", Last('a, Literal(true)).toAggregateExpression()) assertEqual("last(a)", Last('a, Literal(false)).toAggregateExpression()) } + + test("timestamp literals") { + DateTimeTestUtils.outstandingTimezones.foreach { timeZone => + withSQLConf(SQLConf.SESSION_LOCAL_TIMEZONE.key -> timeZone.getID) { + def toMicros(time: LocalDateTime): Long = { + val seconds = time.atZone(timeZone.toZoneId).toInstant.getEpochSecond + TimeUnit.SECONDS.toMicros(seconds) + } + assertEval( + sqlCommand = "TIMESTAMP '2019-01-14 20:54:00.000'", + expect = toMicros(LocalDateTime.of(2019, 1, 14, 20, 54))) + assertEval( + sqlCommand = "Timestamp '2000-01-01T00:55:00'", + expect = toMicros(LocalDateTime.of(2000, 1, 1, 0, 55))) + // Parsing of the string does not depend on the SQL config because the string contains + // time zone offset already. + assertEval( + sqlCommand = "TIMESTAMP '2019-01-16 20:50:00.567000+01:00'", + expect = 1547668200567000L) + } + } + } + + test("date literals") { + DateTimeTestUtils.outstandingTimezonesIds.foreach { timeZone => + withSQLConf(SQLConf.SESSION_LOCAL_TIMEZONE.key -> timeZone) { + assertEval("DATE '2019-01-14'", 17910) + assertEval("DATE '2019-01'", 17897) + assertEval("DATE '2019'", 17897) + } + } + } } diff --git a/sql/core/src/test/resources/sql-tests/results/literals.sql.out b/sql/core/src/test/resources/sql-tests/results/literals.sql.out index 7f30161..8d8decb 100644 --- a/sql/core/src/test/resources/sql-tests/results/literals.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/literals.sql.out @@ -291,7 +291,7 @@ struct<> -- !query 31 output org.apache.spark.sql.catalyst.parser.ParseException -Exception parsing DATE(line 1, pos 7) +Cannot parse the DATE value: mar 11 2016(line 1, pos 7) == SQL == select date 'mar 11 2016' @@ -313,7 +313,7 @@ struct<> -- !query 33 output org.apache.spark.sql.catalyst.parser.ParseException -Timestamp format must be yyyy-mm-dd hh:mm:ss[.fffffffff](line 1, pos 7) +Cannot parse the TIMESTAMP value: 2016-33-11 20:54:00.000(line 1, pos 7) == SQL == select timestamp '2016-33-11 20:54:00.000' --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org