This is an automated email from the ASF dual-hosted git repository. jhyde pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/calcite.git
commit 4fbcbf42443745988f2ba2555bf846372e17330d Author: Julian Hyde <[email protected]> AuthorDate: Fri Dec 9 01:54:54 2022 -0800 [CALCITE-5508] Add constructor functions for DATE, TIME, TIMESTAMP, DATETIME (enabled in BigQuery library) New DATE functions such as DATE(int, int, int) needs to work alongside the existing DATE(string) function; all are validated usig the type operand checker, but after validation DATE(string) is desugared to CAST(string AS DATE). Following [CALCITE-5424] Customize handling of literals based on type system which deferred validation of DATE, TIME, TIMESTAMP literals, we now also allow DATETIME literal. (In BigQuery mode it is mapped to a literal in Calcite's TIMESTAMP type.) Also includes a fix for [CALCITE-5498] BigQuery TIMESTAMP() function short notation for timezone offsets isn’t supported in Java 8 In a few places I had to change test output. I removed "UTC" from test output and added TODO comments; these changes should reverted when [CALCITE-5446] Support TIMESTAMP WITH LOCAL TIME ZONE type in JDBC driver is fixed. And I manually added the offset to some TIMESTAMP WITH LOCAL TIME ZONE LITERALS (which were written as TIMESTAMP because big-query.iq uses BigQuery's type aliasing); these changes can be reverted when [CALCITE-5496] Support time zones when parsing TIMESTAMP WITH LOCAL TIME ZONE is fixed. Close apache/calcite#3023 --- babel/src/main/codegen/config.fmpp | 3 +- babel/src/main/codegen/includes/parserImpls.ftl | 20 -- .../org/apache/calcite/test/BabelQuidemTest.java | 8 + babel/src/test/resources/sql/big-query.iq | 286 +++++++++++++++++---- core/src/main/codegen/default_config.fmpp | 2 +- core/src/main/codegen/templates/Parser.jj | 25 ++ .../calcite/adapter/enumerable/RexImpTable.java | 9 +- .../org/apache/calcite/runtime/SqlFunctions.java | 246 ++++++++++++++++++ .../java/org/apache/calcite/sql/SqlLiteral.java | 2 +- .../calcite/sql/fun/SqlDatetimePlusOperator.java | 26 +- .../calcite/sql/fun/SqlLibraryOperators.java | 112 +++++++- .../calcite/sql/fun/SqlTimestampAddFunction.java | 55 ++-- .../org/apache/calcite/sql/type/OperandTypes.java | 62 +++++ .../org/apache/calcite/sql/type/ReturnTypes.java | 7 + .../org/apache/calcite/sql/type/SqlTypeName.java | 4 +- .../calcite/sql2rel/SqlNodeToRexConverterImpl.java | 4 + .../calcite/sql2rel/StandardConvertletTable.java | 19 +- .../calcite/adapter/innodb/InnodbEnumerator.java | 1 + site/_docs/reference.md | 29 +++ .../apache/calcite/test/ConnectionFactories.java | 26 ++ .../org/apache/calcite/test/SqlOperatorTest.java | 8 +- 21 files changed, 828 insertions(+), 126 deletions(-) diff --git a/babel/src/main/codegen/config.fmpp b/babel/src/main/codegen/config.fmpp index cd7e010180..972cb47b8d 100644 --- a/babel/src/main/codegen/config.fmpp +++ b/babel/src/main/codegen/config.fmpp @@ -532,9 +532,8 @@ data: { # List of methods for parsing builtin function calls. # Return type of method implementation should be "SqlNode". - # Example: "DateFunctionCall()". + # Example: "DateaddFunctionCall()". builtinFunctionCallMethods: [ - "DateFunctionCall()" "DateaddFunctionCall()" ] diff --git a/babel/src/main/codegen/includes/parserImpls.ftl b/babel/src/main/codegen/includes/parserImpls.ftl index 5a598fc29c..fb169fe1e8 100644 --- a/babel/src/main/codegen/includes/parserImpls.ftl +++ b/babel/src/main/codegen/includes/parserImpls.ftl @@ -22,26 +22,6 @@ JoinType LeftSemiJoin() : <LEFT> <SEMI> <JOIN> { return JoinType.LEFT_SEMI_JOIN; } } -SqlNode DateFunctionCall() : -{ - final SqlFunctionCategory funcType = SqlFunctionCategory.USER_DEFINED_FUNCTION; - final SqlIdentifier qualifiedName; - final Span s; - final SqlLiteral quantifier; - final List<? extends SqlNode> args; -} -{ - <DATE> { - s = span(); - qualifiedName = new SqlIdentifier(unquotedIdentifier(), getPos()); - } - args = FunctionParameterList(ExprContext.ACCEPT_SUB_QUERY) { - quantifier = (SqlLiteral) args.get(0); - args.remove(0); - return createCall(qualifiedName, s.end(this), funcType, quantifier, args); - } -} - SqlNode DateaddFunctionCall() : { final Span s; diff --git a/babel/src/test/java/org/apache/calcite/test/BabelQuidemTest.java b/babel/src/test/java/org/apache/calcite/test/BabelQuidemTest.java index 5aba653855..69bdbaf1e1 100644 --- a/babel/src/test/java/org/apache/calcite/test/BabelQuidemTest.java +++ b/babel/src/test/java/org/apache/calcite/test/BabelQuidemTest.java @@ -27,6 +27,7 @@ import org.apache.calcite.sql.SqlWriter; import org.apache.calcite.sql.parser.SqlParser; import org.apache.calcite.sql.parser.babel.SqlBabelParserImpl; import org.apache.calcite.sql.pretty.SqlPrettyWriter; +import org.apache.calcite.sql.type.SqlTypeName; import org.apache.calcite.sql.validate.SqlConformanceEnum; import org.apache.calcite.tools.Frameworks; import org.apache.calcite.tools.Planner; @@ -112,6 +113,13 @@ class BabelQuidemTest extends QuidemTest { .with(CalciteConnectionProperty.CONFORMANCE, SqlConformanceEnum.BABEL) .with(CalciteConnectionProperty.LENIENT_OPERATOR_LOOKUP, true) + .with( + ConnectionFactories.addType("DATETIME", typeFactory -> + typeFactory.createSqlType(SqlTypeName.TIMESTAMP))) + .with( + ConnectionFactories.addType("TIMESTAMP", typeFactory -> + typeFactory.createSqlType( + SqlTypeName.TIMESTAMP_WITH_LOCAL_TIME_ZONE))) .connect(); case "scott-postgresql": return CalciteAssert.that() diff --git a/babel/src/test/resources/sql/big-query.iq b/babel/src/test/resources/sql/big-query.iq index 20107afe66..9172488d33 100755 --- a/babel/src/test/resources/sql/big-query.iq +++ b/babel/src/test/resources/sql/big-query.iq @@ -15,6 +15,18 @@ # See the License for the specific language governing permissions and # limitations under the License. # +# In this file (due to the use of 'scott-big-query' below, and unlike typical +# Quidem tests), types are aliased to be consistent with BigQuery's type names, +# not Calcite's usual type names. # In particular: +# * DATETIME is what Calcite calls TIMESTAMP; +# * TIMESTAMP is what Calcite calls TIMESTAMP WITH LOCAL TIME ZONE; +# * DATE and TIME are the same as Calcite; +# * BYTES means VARBINARY(*); +# * STRING means VARCHAR(*). +# +# The DATETIME() and TIMESTAMP() functions are also substituted so that they +# produce values that BigQuery would call DATETIME and TIMESTAMP. +# !use scott-big-query !set outputformat mysql @@ -349,15 +361,15 @@ SELECT current_timestamp() AS now, t.current_timestamp FROM t; # In the following example, EXTRACT returns a value corresponding to # the DAY date part. -!if (false) { SELECT EXTRACT(DAY FROM DATE '2013-12-25') AS the_day; +---------+ | the_day | +---------+ -| 25 | +| 25 | +---------+ +(1 row) + !ok -!} # In the following example, EXTRACT returns values corresponding to # different date parts from a column of dates near the end of the @@ -647,6 +659,8 @@ SELECT LENGTH("hello") as length; ##################################################################### # DATE # +# 0. DATE(string) +# Shorthand for 'CAST(string AS DATE)' # 1. DATE(year, month, day) # Constructs a DATE from INT64 values representing the year, month, # and day. @@ -659,19 +673,88 @@ SELECT LENGTH("hello") as length; # # Returns DATE -!if (false) { +# 'date(x) is shorthand for 'cast(x as date)' +select date('1970-01-01') as d; ++------------+ +| d | ++------------+ +| 1970-01-01 | ++------------+ +(1 row) + +!ok + +select date(cast(null as varchar(10))) as d; ++---+ +| d | ++---+ +| | ++---+ +(1 row) + +!ok + SELECT DATE(2016, 12, 25) AS date_ymd, DATE(DATETIME "2016-12-25 23:59:59") AS date_dt, - DATE(TIMESTAMP "2016-12-25 05:30:00+07", "America/Los_Angeles") AS date_tstz; + DATE(TIMESTAMP "2016-12-24 22:30:00" /* TODO should be "2016-12-25 05:30:00+07" */ , "America/Los_Angeles") AS date_tstz; +------------+------------+------------+ | date_ymd | date_dt | date_tstz | +------------+------------+------------+ | 2016-12-25 | 2016-12-25 | 2016-12-24 | +------------+------------+------------+ +(1 row) + !ok -!} +select date(2022, 11, 15) as d1, + date(datetime "2008-01-01 01:03:05") as d2, + date(datetime(2008, 1, 1, 1, 3, 5)) as d3; ++------------+------------+------------+ +| d1 | d2 | d3 | ++------------+------------+------------+ +| 2022-11-15 | 2008-01-01 | 2008-01-01 | ++------------+------------+------------+ +(1 row) + +!ok + +# Test timezone conversion when converting TIMESTAMP to DATE. +# Denver observes DST whereas Phoenix does not. +# Both cities have a -07:00 offset in winter, but Denver has -06:00 in summer. +select date(timestamp("2008-06-21 06:30:00")) as sum_utc, + date(timestamp("2008-06-21 06:30:00"), "America/Denver") as sum_dst, + date(timestamp("2008-06-21 06:30:00"), "America/Phoenix") as sum_std, + date(timestamp("2008-12-21 06:30:00")) as win_utc, + date(timestamp("2008-12-21 06:30:00"), "America/Denver") as win_dst, + date(timestamp("2008-12-21 06:30:00"), "America/Phoenix") as win_std; ++------------+------------+------------+------------+------------+------------+ +| sum_utc | sum_dst | sum_std | win_utc | win_dst | win_std | ++------------+------------+------------+------------+------------+------------+ +| 2008-06-21 | 2008-06-21 | 2008-06-20 | 2008-12-21 | 2008-12-20 | 2008-12-20 | ++------------+------------+------------+------------+------------+------------+ +(1 row) + +!ok + +# Date-time literals. +# As for other tests in this script, data type semantics match BigQuery: +# DATETIME is what Calcite calls TIMESTAMP; +# TIMESTAMP is what Calcite calls TIMESTAMP WITH LOCAL TIME ZONE; +# DATE and TIME are the same as Calcite. +SELECT + DATETIME "1969-07-21 02:56:00" AS dt, + TIMESTAMP "1969-07-21 02:56:00" AS ts, + DATE "1969-07-21" AS d, + TIME "02:56:00" AS t; ++---------------------+---------------------+------------+----------+ +| dt | ts | d | t | ++---------------------+---------------------+------------+----------+ +| 1969-07-21 02:56:00 | 1969-07-21 02:56:00 | 1969-07-21 | 02:56:00 | ++---------------------+---------------------+------------+----------+ +(1 row) + +!ok ##################################################################### # DATETIME @@ -689,17 +772,42 @@ SELECT # # Returns DATETIME -!if (false) { SELECT DATETIME(2008, 12, 25, 05, 30, 00) as datetime_ymdhms, - DATETIME(TIMESTAMP "2008-12-25 05:30:00+00", "America/Los_Angeles") as datetime_tstz; + DATETIME(TIMESTAMP "2008-12-25 05:30:00" /* TODO should be "2008-12-25 05:30:00+00" */, "America/Los_Angeles") as datetime_tstz; +---------------------+---------------------+ | datetime_ymdhms | datetime_tstz | +---------------------+---------------------+ -| 2008-12-25T05:30:00 | 2008-12-24T21:30:00 | +| 2008-12-25 05:30:00 | 2008-12-24 21:30:00 | +---------------------+---------------------+ +(1 row) + +!ok + +select datetime(2003, 05, 25, 12, 30, 59) as dt1, + datetime(date(2003, 05, 25)) as d2, + datetime(date(2003, 05, 25), time(12, 30, 59)) as d3; ++---------------------+---------------------+---------------------+ +| dt1 | d2 | d3 | ++---------------------+---------------------+---------------------+ +| 2003-05-25 12:30:59 | 2003-05-25 00:00:00 | 2003-05-25 12:30:59 | ++---------------------+---------------------+---------------------+ +(1 row) + +!ok + +# This tests time zone conversion from a timestamp. +select datetime(timestamp "2008-01-01 01:03:05") as t_winter, + datetime(timestamp "2008-01-01 01:03:05", "America/Los_Angeles") as t_winter_offs, + datetime(timestamp "2008-07-01 01:03:05", "America/Los_Angeles") as t_summer_offs; ++---------------------+---------------------+---------------------+ +| t_winter | t_winter_offs | t_summer_offs | ++---------------------+---------------------+---------------------+ +| 2008-01-01 01:03:05 | 2007-12-31 17:03:05 | 2008-06-30 18:03:05 | ++---------------------+---------------------+---------------------+ +(1 row) + !ok -!} ##################################################################### # TIME @@ -716,20 +824,19 @@ SELECT # # Returns TIME -!if (false) { SELECT TIME(15, 30, 00) as time_hms, - TIME(TIMESTAMP "2008-12-25 15:30:00+08", "America/Los_Angeles") as time_tstz; + TIME(TIMESTAMP "2008-12-25 07:30:00" /* TODO should be "2008-12-25 15:30:00+08" */, "America/Los_Angeles") as time_tstz; +----------+-----------+ | time_hms | time_tstz | +----------+-----------+ | 15:30:00 | 23:30:00 | +----------+-----------+ +(1 row) + !ok -!} -!if (false) { SELECT TIME(DATETIME "2008-12-25 15:30:00.000000") AS time_dt; +----------+ @@ -737,8 +844,57 @@ SELECT TIME(DATETIME "2008-12-25 15:30:00.000000") AS time_dt; +----------+ | 15:30:00 | +----------+ +(1 row) + +!ok + +select time(12, 30, 59) as t1, + time(datetime(2008, 01, 01, 12, 30, 59)) as t2, + time(datetime(2008, 07, 01, 12, 30, 59)) as t3; ++----------+----------+----------+ +| t1 | t2 | t3 | ++----------+----------+----------+ +| 12:30:59 | 12:30:59 | 12:30:59 | ++----------+----------+----------+ +(1 row) + +!ok + +# Test timezone conversion when converting TIMESTAMP to TIME. +# Denver observes DST whereas Phoenix does not. +# Both cities have a -07:00 offset in winter, but Denver has -06:00 in summer. +select time(timestamp("2008-06-21 06:30:00")) as sum_utc, + time(timestamp("2008-06-21 06:30:00"), "America/Denver") as sum_dst, + time(timestamp("2008-06-21 06:30:00"), "America/Phoenix") as sum_std, + time(timestamp("2008-12-21 06:30:00")) as win_utc, + time(timestamp("2008-12-21 06:30:00"), "America/Denver") as win_dst, + time(timestamp("2008-12-21 06:30:00"), "America/Phoenix") as win_std; ++----------+----------+----------+----------+----------+----------+ +| sum_utc | sum_dst | sum_std | win_utc | win_dst | win_std | ++----------+----------+----------+----------+----------+----------+ +| 06:30:00 | 00:30:00 | 23:30:00 | 06:30:00 | 23:30:00 | 23:30:00 | ++----------+----------+----------+----------+----------+----------+ +(1 row) + +!ok + +# Test timezone conversion when converting DATE to TIMESTAMP. +# Denver observes DST whereas Phoenix does not. +# Both cities have a -07:00 offset in winter, but Denver has -06:00 in summer. +select timestamp(date(2008, 6, 21)) as sum_utc, + timestamp(date(2008, 6, 21), "America/Denver") as sum_dst, + timestamp(date(2008, 6, 21), "America/Phoenix") as sum_std, + timestamp(date(2008, 12, 21)) as win_utc, + timestamp(date(2008, 12, 21), "America/Denver") as win_dst, + timestamp(date(2008, 12, 21), "America/Phoenix") as win_std; ++---------------------+---------------------+---------------------+---------------------+---------------------+---------------------+ +| sum_utc | sum_dst | sum_std | win_utc | win_dst | win_std | ++---------------------+---------------------+---------------------+---------------------+---------------------+---------------------+ +| 2008-06-21 00:00:00 | 2008-06-21 06:00:00 | 2008-06-21 07:00:00 | 2008-12-21 00:00:00 | 2008-12-21 07:00:00 | 2008-12-21 07:00:00 | ++---------------------+---------------------+---------------------+---------------------+---------------------+---------------------+ +(1 row) + !ok -!} ##################################################################### # TIMESTAMP @@ -795,27 +951,74 @@ SELECT TIMESTAMP("2008-12-25 15:30:00 UTC") AS timestamp_str; # Display of results may differ, depending upon the environment and # time zone where this query was executed. -!if (false) { +# +# After [CALCITE-5446] is fixed, this and other TIMESTAMP WITH LOCAL TIME +# values will end in UTC, like this: 2008-12-25 15:30:00 UTC. +# SELECT TIMESTAMP(DATETIME "2008-12-25 15:30:00") AS timestamp_datetime; -+-------------------------+ -| timestamp_datetime | -+-------------------------+ -| 2008-12-25 15:30:00 UTC | -+-------------------------+ ++---------------------+ +| timestamp_datetime | ++---------------------+ +| 2008-12-25 15:30:00 | ++---------------------+ +(1 row) + !ok -!} # Display of results may differ, depending upon the environment and # time zone where this query was executed. -!if (false) { SELECT TIMESTAMP(DATE "2008-12-25") AS timestamp_date; -+-------------------------+ -| timestamp_date | -+-------------------------+ -| 2008-12-25 00:00:00 UTC | -+-------------------------+ ++---------------------+ +| timestamp_date | ++---------------------+ +| 2008-12-25 00:00:00 | ++---------------------+ +(1 row) + +!ok + +# All these timestamps should be equal. +# This tests the BQ timestamp literal string formatter +# (optional 'T', optional leading zeros, optional offset with conversion). +select timestamp("2008-01-01 01:03:05+00") as t_space, + timestamp("2008-01-01T01:03:05+00") as t_iso, + timestamp("2008-01-01 01:03:05") as t_no_offset, + timestamp("2008-1-1 3:5:7+02:02:02") as t_offset; ++---------------------+---------------------+---------------------+---------------------+ +| t_space | t_iso | t_no_offset | t_offset | ++---------------------+---------------------+---------------------+---------------------+ +| 2008-01-01 01:03:05 | 2008-01-01 01:03:05 | 2008-01-01 01:03:05 | 2008-01-01 01:03:05 | ++---------------------+---------------------+---------------------+---------------------+ +(1 row) + +!ok + +# Negative and positive offsets. +select timestamp("2008-01-01 01:03:05+01") as space_01, + timestamp("2008-01-01T01:03:05+01") as iso_1, + timestamp("2008-01-01T01:03:05+01:00") as iso_1_00, + timestamp("2008-01-01T01:03:05-11") as iso_neg_11; ++---------------------+---------------------+---------------------+---------------------+ +| space_01 | iso_1 | iso_1_00 | iso_neg_11 | ++---------------------+---------------------+---------------------+---------------------+ +| 2008-01-01 00:03:05 | 2008-01-01 00:03:05 | 2008-01-01 00:03:05 | 2008-01-01 12:03:05 | ++---------------------+---------------------+---------------------+---------------------+ +(1 row) + +!ok + +# This tests time zone conversion from a datetime. +select timestamp(datetime "2008-01-01 01:03:05") as t_winter, + timestamp(datetime "2008-01-01 01:03:05", "America/Los_Angeles") as t_winter_offs, + timestamp(datetime "2008-07-01 01:03:05", "America/Los_Angeles") as t_summer_offs; ++---------------------+---------------------+---------------------+ +| t_winter | t_winter_offs | t_summer_offs | ++---------------------+---------------------+---------------------+ +| 2008-01-01 01:03:05 | 2008-01-01 09:03:05 | 2008-07-01 08:03:05 | ++---------------------+---------------------+---------------------+ +(1 row) + !ok -!} ##################################################################### # TIMESTAMP_SECONDS @@ -1102,7 +1305,7 @@ order by v; !ok -select unix_date(timestamp '2008-12-25') as d; +select unix_date(datetime '2008-12-25') as d; +-------+ | d | +-------+ @@ -1112,29 +1315,6 @@ select unix_date(timestamp '2008-12-25') as d; !ok -# DATE -# 'date(x) is shorthand for 'cast(x as date)' -select date('1970-01-01') as d; -+------------+ -| d | -+------------+ -| 1970-01-01 | -+------------+ -(1 row) - -!ok - -!if (false) { -select date(cast(null as varchar(10))) as d; -+---+ -| D | -+---+ -| | -+---+ -(1 row) - -!ok -!} ##################################################################### # DATE_ADD diff --git a/core/src/main/codegen/default_config.fmpp b/core/src/main/codegen/default_config.fmpp index c1847a2541..a1ff7d0549 100644 --- a/core/src/main/codegen/default_config.fmpp +++ b/core/src/main/codegen/default_config.fmpp @@ -395,7 +395,7 @@ parser: { # List of methods for parsing builtin function calls. # Return type of method implementation should be "SqlNode". - # Example: "DateFunctionCall()". + # Example: "DateTimeConstructorCall()". builtinFunctionCallMethods: [ ] diff --git a/core/src/main/codegen/templates/Parser.jj b/core/src/main/codegen/templates/Parser.jj index c738e2b151..385524c20d 100644 --- a/core/src/main/codegen/templates/Parser.jj +++ b/core/src/main/codegen/templates/Parser.jj @@ -4034,6 +4034,7 @@ SqlNode AtomicRowExpression() : } { ( + LOOKAHEAD(2) e = LiteralOrIntervalExpression() | e = DynamicParam() @@ -4630,6 +4631,28 @@ SqlLiteral DateTimeLiteral() : } } +/** Parses a Date/Time constructor function, for example "DATE(1969, 7, 21)" + * or "DATETIME(d, t)". Enabled in some libraries (e.g. BigQuery). */ +SqlNode DateTimeConstructorCall() : +{ + final SqlFunctionCategory funcType = SqlFunctionCategory.TIMEDATE; + final SqlIdentifier qualifiedName; + final Span s; + final SqlLiteral quantifier; + final List<? extends SqlNode> args; +} +{ + (<DATE> | <TIME> | <DATETIME> | <TIMESTAMP>) { + s = span(); + qualifiedName = new SqlIdentifier(unquotedIdentifier(), getPos()); + } + args = FunctionParameterList(ExprContext.ACCEPT_SUB_QUERY) { + quantifier = (SqlLiteral) args.get(0); + args.remove(0); + return createCall(qualifiedName, s.end(this), funcType, quantifier, args); + } +} + /** Parses a MULTISET constructor */ SqlNode MultisetConstructor() : { @@ -6097,6 +6120,8 @@ SqlNode BuiltinFunctionCall() : <RPAREN> { return SqlStdOperatorTable.TRIM.createCall(s.end(this), args); } + | + node = DateTimeConstructorCall() { return node; } | node = DateTruncFunctionCall() { return node; } | diff --git a/core/src/main/java/org/apache/calcite/adapter/enumerable/RexImpTable.java b/core/src/main/java/org/apache/calcite/adapter/enumerable/RexImpTable.java index e61c7731d6..66c4460e80 100644 --- a/core/src/main/java/org/apache/calcite/adapter/enumerable/RexImpTable.java +++ b/core/src/main/java/org/apache/calcite/adapter/enumerable/RexImpTable.java @@ -124,6 +124,7 @@ import static org.apache.calcite.sql.fun.SqlLibraryOperators.CONCAT_FUNCTION; import static org.apache.calcite.sql.fun.SqlLibraryOperators.COSH; import static org.apache.calcite.sql.fun.SqlLibraryOperators.DATE; import static org.apache.calcite.sql.fun.SqlLibraryOperators.DATEADD; +import static org.apache.calcite.sql.fun.SqlLibraryOperators.DATETIME; import static org.apache.calcite.sql.fun.SqlLibraryOperators.DATE_FROM_UNIX_DATE; import static org.apache.calcite.sql.fun.SqlLibraryOperators.DATE_TRUNC; import static org.apache.calcite.sql.fun.SqlLibraryOperators.DAYNAME; @@ -166,6 +167,8 @@ import static org.apache.calcite.sql.fun.SqlLibraryOperators.SPACE; import static org.apache.calcite.sql.fun.SqlLibraryOperators.STARTS_WITH; import static org.apache.calcite.sql.fun.SqlLibraryOperators.STRCMP; import static org.apache.calcite.sql.fun.SqlLibraryOperators.TANH; +import static org.apache.calcite.sql.fun.SqlLibraryOperators.TIME; +import static org.apache.calcite.sql.fun.SqlLibraryOperators.TIMESTAMP; import static org.apache.calcite.sql.fun.SqlLibraryOperators.TIMESTAMP_MICROS; import static org.apache.calcite.sql.fun.SqlLibraryOperators.TIMESTAMP_MILLIS; import static org.apache.calcite.sql.fun.SqlLibraryOperators.TIMESTAMP_SECONDS; @@ -550,6 +553,11 @@ public class RexImpTable { defineMethod(DATE_FROM_UNIX_DATE, "dateFromUnixDate", NullPolicy.STRICT); defineMethod(UNIX_DATE, "unixDate", NullPolicy.STRICT); + defineMethod(DATE, "date", NullPolicy.STRICT); + defineMethod(DATETIME, "datetime", NullPolicy.STRICT); + defineMethod(TIMESTAMP, "timestamp", NullPolicy.STRICT); + defineMethod(TIME, "time", NullPolicy.STRICT); + map.put(IS_NULL, new IsNullImplementor()); map.put(IS_NOT_NULL, new IsNotNullImplementor()); map.put(IS_TRUE, new IsTrueImplementor()); @@ -624,7 +632,6 @@ public class RexImpTable { map.put(COALESCE, new CoalesceImplementor()); map.put(CAST, new CastImplementor()); - map.put(DATE, new CastImplementor()); map.put(REINTERPRET, new ReinterpretImplementor()); diff --git a/core/src/main/java/org/apache/calcite/runtime/SqlFunctions.java b/core/src/main/java/org/apache/calcite/runtime/SqlFunctions.java index 1f5c0f21b2..a44c6390f0 100644 --- a/core/src/main/java/org/apache/calcite/runtime/SqlFunctions.java +++ b/core/src/main/java/org/apache/calcite/runtime/SqlFunctions.java @@ -59,10 +59,18 @@ import java.math.RoundingMode; import java.sql.SQLException; import java.sql.Timestamp; import java.text.DecimalFormat; +import java.time.Instant; import java.time.LocalDate; import java.time.LocalDateTime; +import java.time.LocalTime; +import java.time.OffsetDateTime; +import java.time.ZoneId; import java.time.ZoneOffset; import java.time.format.DateTimeFormatter; +import java.time.format.DateTimeFormatterBuilder; +import java.time.format.DateTimeParseException; +import java.time.format.SignStyle; +import java.time.temporal.ChronoField; import java.util.ArrayList; import java.util.Arrays; import java.util.Base64; @@ -153,6 +161,44 @@ public class SqlFunctions { private static final ByteString SINGLE_SPACE_BYTE_STRING = ByteString.of("20", 16); + // Date formatter for BigQuery's timestamp literals: + // https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#timestamp_literals + private static final DateTimeFormatter BIG_QUERY_TIMESTAMP_LITERAL_FORMATTER = + new DateTimeFormatterBuilder() + // Unlike ISO 8601, BQ only supports years between 1 - 9999, + // but can support single-digit month and day parts. + .appendValue(ChronoField.YEAR, 4) + .appendLiteral('-') + .appendValue(ChronoField.MONTH_OF_YEAR, 1, 2, SignStyle.NOT_NEGATIVE) + .appendLiteral('-') + .appendValue(ChronoField.DAY_OF_MONTH, 1, 2, SignStyle.NOT_NEGATIVE) + // Everything after the date is optional. Optional sections can be nested. + .optionalStart() + // BQ accepts either a literal 'T' or a space to separate the date from the time, + // so make the 'T' optional but pad with 1 space if it's omitted. + .padNext(1, ' ') + .optionalStart() + .appendLiteral('T') + .optionalEnd() + // Unlike ISO 8601, BQ can support single-digit hour, minute, and second parts. + .appendValue(ChronoField.HOUR_OF_DAY, 1, 2, SignStyle.NOT_NEGATIVE) + .appendLiteral(':') + .appendValue(ChronoField.MINUTE_OF_HOUR, 1, 2, SignStyle.NOT_NEGATIVE) + .appendLiteral(':') + .appendValue(ChronoField.SECOND_OF_MINUTE, 1, 2, SignStyle.NOT_NEGATIVE) + // ISO 8601 supports up to nanosecond precision, but BQ only up to microsecond. + .optionalStart() + .appendFraction(ChronoField.MICRO_OF_SECOND, 0, 6, true) + .optionalEnd() + .optionalStart() + .parseLenient() + .appendOffsetId() + .toFormatter(Locale.ROOT); + + /** Whether the current Java version is 8 (1.8). */ + private static final boolean IS_JDK_8 = + System.getProperty("java.version").startsWith("1.8"); + private SqlFunctions() { } @@ -2650,6 +2696,206 @@ public class SqlFunctions { return v; } + /** SQL {@code DATE(year, month, day)} function. */ + public static int date(int year, int month, int day) { + // Calcite represents dates as Unix integers (days since epoch). + return (int) LocalDate.of(year, month, day).toEpochDay(); + } + + /** SQL {@code DATE(TIMESTAMP)} and + * {@code DATE(TIMESTAMP WITH LOCAL TIME ZONE)} functions. */ + public static int date(long timestampMillis) { + // Calcite represents dates as Unix integers (days since epoch). + // Unix time ignores leap seconds; every day has the exact same number of + // milliseconds. BigQuery TIMESTAMP and DATETIME values (Calcite TIMESTAMP + // WITH LOCAL TIME ZONE and TIMESTAMP, respectively) are represented + // internally as milliseconds since epoch (or epoch UTC). + return (int) (timestampMillis / DateTimeUtils.MILLIS_PER_DAY); + } + + /** SQL {@code DATE(TIMESTAMP WITH LOCAL TIME, timeZone)} function. */ + public static int date(long timestampMillis, String timeZone) { + // Calcite represents dates as Unix integers (days since epoch). + return (int) OffsetDateTime.ofInstant(Instant.ofEpochMilli(timestampMillis), + ZoneId.of(timeZone)) + .toLocalDate() + .toEpochDay(); + } + + /** SQL {@code DATETIME(<year>, <month>, <day>, <hour>, <minute>, <second>)} + * function. */ + public static long datetime(int year, int month, int day, int hour, + int minute, int second) { + // BigQuery's DATETIME function returns a Calcite TIMESTAMP, + // represented internally as milliseconds since epoch UTC. + return LocalDateTime.of(year, month, day, hour, minute, second) + .toEpochSecond(ZoneOffset.UTC) + * DateTimeUtils.MILLIS_PER_SECOND; + } + + /** SQL {@code DATETIME(DATE)} function; returns a Calcite TIMESTAMP. */ + public static long datetime(int daysSinceEpoch) { + // BigQuery's DATETIME function returns a Calcite TIMESTAMP, + // represented internally as milliseconds since epoch. + return daysSinceEpoch * DateTimeUtils.MILLIS_PER_DAY; + } + + /** SQL {@code DATETIME(DATE, TIME)} function; returns a Calcite TIMESTAMP. */ + public static long datetime(int daysSinceEpoch, int millisSinceMidnight) { + // BigQuery's DATETIME function returns a Calcite TIMESTAMP, + // represented internally as milliseconds since epoch UTC. + return daysSinceEpoch * DateTimeUtils.MILLIS_PER_DAY + millisSinceMidnight; + } + + /** SQL {@code DATETIME(TIMESTAMP WITH LOCAL TIME ZONE)} function; + * returns a Calcite TIMESTAMP. */ + public static long datetime(long millisSinceEpoch) { + // BigQuery TIMESTAMP and DATETIME values (Calcite TIMESTAMP WITH LOCAL TIME + // ZONE and TIMESTAMP, respectively) are represented internally as + // milliseconds since epoch (or epoch UTC). + return millisSinceEpoch; + } + + /** SQL {@code DATETIME(TIMESTAMP, timeZone)} function; + * returns a Calcite TIMESTAMP. */ + public static long datetime(long millisSinceEpoch, String timeZone) { + // BigQuery TIMESTAMP and DATETIME values (Calcite TIMESTAMP WITH LOCAL TIME + // ZONE and TIMESTAMP, respectively) are represented internally as + // milliseconds since epoch (or epoch UTC). + return OffsetDateTime.ofInstant(Instant.ofEpochMilli(millisSinceEpoch), + ZoneId.of(timeZone)) + .atZoneSimilarLocal(ZoneId.of("UTC")) + .toInstant() + .toEpochMilli(); + } + + /** SQL {@code TIMESTAMP(<string>)} function. */ + public static long timestamp(String expression) { + // Calcite represents TIMESTAMP WITH LOCAL TIME ZONE as Unix integers + // (milliseconds since epoch). + return parseBigQueryTimestampLiteral(expression).toInstant().toEpochMilli(); + } + + /** SQL {@code TIMESTAMP(<string>, <timeZone>)} function. */ + public static long timestamp(String expression, String timeZone) { + // Calcite represents TIMESTAMP WITH LOCAL TIME ZONE as Unix integers + // (milliseconds since epoch). + return parseBigQueryTimestampLiteral(expression) + .atZoneSimilarLocal(ZoneId.of(timeZone)) + .toInstant() + .toEpochMilli(); + } + + private static OffsetDateTime parseBigQueryTimestampLiteral(String expression) { + // First try to parse with an offset, otherwise parse as a local and assume + // UTC ("no offset"). + try { + return OffsetDateTime.parse(expression, + BIG_QUERY_TIMESTAMP_LITERAL_FORMATTER); + } catch (DateTimeParseException e) { + // ignore + } + if (IS_JDK_8 + && expression.matches(".*[+-][0-9][0-9]$")) { + // JDK 8 has a bug that prevents matching offsets like "+00" but can + // match "+00:00". + try { + expression += ":00"; + return OffsetDateTime.parse(expression, + BIG_QUERY_TIMESTAMP_LITERAL_FORMATTER); + } catch (DateTimeParseException e) { + // ignore + } + } + try { + return LocalDateTime + .parse(expression, BIG_QUERY_TIMESTAMP_LITERAL_FORMATTER) + .atOffset(ZoneOffset.UTC); + } catch (DateTimeParseException e2) { + throw new IllegalArgumentException( + String.format(Locale.ROOT, + "Could not parse BigQuery timestamp literal: %s", expression), + e2); + } + } + + /** SQL {@code TIMESTAMP(<date>)} function. */ + public static long timestamp(int days) { + // Calcite represents TIMESTAMP WITH LOCAL TIME ZONE as Unix integers + // (milliseconds since epoch). Unix time ignores leap seconds; every day + // has the same number of milliseconds. + return ((long) days) * DateTimeUtils.MILLIS_PER_DAY; + } + + /** SQL {@code TIMESTAMP(<date>, <timeZone>)} function. */ + public static long timestamp(int days, String timeZone) { + // Calcite represents TIMESTAMP WITH LOCAL TIME ZONE as Unix integers + // (milliseconds since epoch). + final LocalDateTime localDateTime = + LocalDateTime.of(LocalDate.ofEpochDay(days), LocalTime.MIDNIGHT); + final ZoneOffset zoneOffset = + ZoneId.of(timeZone).getRules().getOffset(localDateTime); + return OffsetDateTime.of(localDateTime, zoneOffset) + .toInstant() + .toEpochMilli(); + } + + /** SQL {@code TIMESTAMP(<timestamp>)} function; returns a TIMESTAMP WITH + * LOCAL TIME ZONE. */ + public static long timestamp(long millisSinceEpoch) { + // BigQuery TIMESTAMP and DATETIME values (Calcite TIMESTAMP WITH LOCAL + // TIME ZONE and TIMESTAMP, respectively) are represented internally as + // milliseconds since epoch UTC and epoch. + return millisSinceEpoch; + } + + /** SQL {@code TIMESTAMP(<timestamp>, <timeZone>)} function; returns a + * TIMESTAMP WITH LOCAL TIME ZONE. */ + public static long timestamp(long millisSinceEpoch, String timeZone) { + // BigQuery TIMESTAMP and DATETIME values (Calcite TIMESTAMP WITH LOCAL + // TIME ZONE and TIMESTAMP, respectively) are represented internally as + // milliseconds since epoch UTC and epoch. + final Instant instant = Instant.ofEpochMilli(millisSinceEpoch); + final ZoneId utcZone = ZoneId.of("UTC"); + return OffsetDateTime.ofInstant(instant, utcZone) + .atZoneSimilarLocal(ZoneId.of(timeZone)) + .toInstant() + .toEpochMilli(); + } + + /** SQL {@code TIME(<hour>, <minute>, <second>)} function. */ + public static int time(int hour, int minute, int second) { + // Calcite represents time as Unix integers (milliseconds since midnight). + return (int) (LocalTime.of(hour, minute, second).toSecondOfDay() + * DateTimeUtils.MILLIS_PER_SECOND); + } + + /** SQL {@code TIME(<timestamp>)} and {@code TIME(<timestampLtz>)} + * functions. */ + public static int time(long timestampMillis) { + // Calcite represents time as Unix integers (milliseconds since midnight). + // Unix time ignores leap seconds; every day has the same number of + // milliseconds. + // + // BigQuery TIMESTAMP and DATETIME values (Calcite TIMESTAMP WITH LOCAL + // TIME ZONE and TIMESTAMP, respectively) are represented internally as + // milliseconds since epoch UTC and epoch. + return (int) (timestampMillis % DateTimeUtils.MILLIS_PER_DAY); + } + + /** SQL {@code TIME(<timestampLtz>, <timeZone>)} function. */ + public static int time(long timestampMillis, String timeZone) { + // Calcite represents time as Unix integers (milliseconds since midnight). + // Unix time ignores leap seconds; every day has the same number of + // milliseconds. + final Instant instant = Instant.ofEpochMilli(timestampMillis); + final ZoneId zoneId = ZoneId.of(timeZone); + return (int) (OffsetDateTime.ofInstant(instant, zoneId) + .toLocalTime() + .toNanoOfDay() + / (1000L * 1000L)); // milli > micro > nano + } + public static @PolyNull Long toTimestampWithLocalTimeZone(@PolyNull String v) { if (v == null) { return castNonNull(null); diff --git a/core/src/main/java/org/apache/calcite/sql/SqlLiteral.java b/core/src/main/java/org/apache/calcite/sql/SqlLiteral.java index e60c000196..b4e08ed0b3 100644 --- a/core/src/main/java/org/apache/calcite/sql/SqlLiteral.java +++ b/core/src/main/java/org/apache/calcite/sql/SqlLiteral.java @@ -881,7 +881,7 @@ public class SqlLiteral extends SqlNode { return createTimestamp(SqlTypeName.TIMESTAMP, ts, precision, pos); } - /** Creates a TIMESTAMP or TIMESTAMP WITH TIME ZONE literal. */ + /** Creates a TIMESTAMP or TIMESTAMP WITH LOCAL TIME ZONE literal. */ public static SqlTimestampLiteral createTimestamp( SqlTypeName typeName, TimestampString ts, diff --git a/core/src/main/java/org/apache/calcite/sql/fun/SqlDatetimePlusOperator.java b/core/src/main/java/org/apache/calcite/sql/fun/SqlDatetimePlusOperator.java index dd83c26551..0be4b0e99a 100644 --- a/core/src/main/java/org/apache/calcite/sql/fun/SqlDatetimePlusOperator.java +++ b/core/src/main/java/org/apache/calcite/sql/fun/SqlDatetimePlusOperator.java @@ -16,9 +16,6 @@ */ package org.apache.calcite.sql.fun; -import org.apache.calcite.avatica.util.TimeUnit; -import org.apache.calcite.rel.type.RelDataType; -import org.apache.calcite.rel.type.RelDataTypeFactory; import org.apache.calcite.sql.SqlCall; import org.apache.calcite.sql.SqlKind; import org.apache.calcite.sql.SqlOperatorBinding; @@ -27,32 +24,31 @@ import org.apache.calcite.sql.SqlWriter; import org.apache.calcite.sql.type.InferTypes; import org.apache.calcite.sql.type.IntervalSqlType; import org.apache.calcite.sql.type.OperandTypes; -import org.apache.calcite.sql.type.ReturnTypes; +import org.apache.calcite.sql.type.SqlReturnTypeInference; +import org.apache.calcite.sql.type.SqlTypeTransforms; import org.apache.calcite.sql.validate.SqlMonotonicity; /** * Operator that adds an INTERVAL to a DATETIME. */ public class SqlDatetimePlusOperator extends SqlSpecialOperator { + private static final SqlReturnTypeInference RETURN_TYPE_INFERENCE = + opBinding -> + SqlTimestampAddFunction.deduceType(opBinding.getTypeFactory(), + ((IntervalSqlType) opBinding.getOperandType(1)) + .getIntervalQualifier().getStartUnit(), + opBinding.getOperandType(0)); + //~ Constructors ----------------------------------------------------------- SqlDatetimePlusOperator() { - super("+", SqlKind.PLUS, 40, true, ReturnTypes.ARG2_NULLABLE, + super("+", SqlKind.PLUS, 40, true, + RETURN_TYPE_INFERENCE.andThen(SqlTypeTransforms.TO_NULLABLE), InferTypes.FIRST_KNOWN, OperandTypes.MINUS_DATE_OPERATOR); } //~ Methods ---------------------------------------------------------------- - @Override public RelDataType inferReturnType(SqlOperatorBinding opBinding) { - final RelDataTypeFactory typeFactory = opBinding.getTypeFactory(); - final RelDataType leftType = opBinding.getOperandType(0); - final IntervalSqlType unitType = - (IntervalSqlType) opBinding.getOperandType(1); - final TimeUnit timeUnit = unitType.getIntervalQualifier().getStartUnit(); - return SqlTimestampAddFunction.deduceType(typeFactory, timeUnit, - unitType, leftType); - } - @Override public void unparse( SqlWriter writer, SqlCall call, diff --git a/core/src/main/java/org/apache/calcite/sql/fun/SqlLibraryOperators.java b/core/src/main/java/org/apache/calcite/sql/fun/SqlLibraryOperators.java index 3bbe6fdd1e..9efadfca79 100644 --- a/core/src/main/java/org/apache/calcite/sql/fun/SqlLibraryOperators.java +++ b/core/src/main/java/org/apache/calcite/sql/fun/SqlLibraryOperators.java @@ -482,11 +482,119 @@ public abstract class SqlLibraryOperators { public static final SqlAggFunction MIN_BY = SqlStdOperatorTable.ARG_MIN.withName("MIN_BY"); - /** The "DATE(string)" function, equivalent to "CAST(string AS DATE). */ + /** The "DATE" function. It has the following overloads: + * + * <ul> + * <li>{@code DATE(string)} is syntactic sugar for + * {@code CAST(string AS DATE)} + * <li>{@code DATE(year, month, day)} + * <li>{@code DATE(timestampLtz [, timeZone])} + * <li>{@code DATE(timestamp)} + * </ul> + */ @LibraryOperator(libraries = {BIG_QUERY}) public static final SqlFunction DATE = SqlBasicFunction.create("DATE", ReturnTypes.DATE_NULLABLE, - OperandTypes.STRING, SqlFunctionCategory.TIMEDATE); + OperandTypes.or( + // DATE(string) + OperandTypes.STRING, + // DATE(year, month, day) + OperandTypes.family(SqlTypeFamily.INTEGER, SqlTypeFamily.INTEGER, + SqlTypeFamily.INTEGER), + // DATE(timestamp) + OperandTypes.TIMESTAMP_NTZ, + // DATE(timestampLtz) + OperandTypes.TIMESTAMP_LTZ, + // DATE(timestampLtz, timeZone) + OperandTypes.sequence( + "DATE(TIMESTAMP WITH LOCAL TIME ZONE, VARCHAR)", + OperandTypes.TIMESTAMP_LTZ, OperandTypes.CHARACTER)), + SqlFunctionCategory.TIMEDATE); + + /** The "DATETIME" function returns a Calcite + * {@code TIMESTAMP} (which BigQuery calls a {@code DATETIME}). + * It has the following overloads: + * + * <ul> + * <li>{@code DATETIME(year, month, day, hour, minute, second)} + * <li>{@code DATETIME(date[, time])} + * <li>{@code DATETIME(timestampLtz[, timeZone])} + * </ul> + */ + @LibraryOperator(libraries = {BIG_QUERY}) + public static final SqlFunction DATETIME = + SqlBasicFunction.create("DATETIME", ReturnTypes.TIMESTAMP_NULLABLE, + OperandTypes.or( + // DATETIME(year, month, day, hour, minute, second) + OperandTypes.family(SqlTypeFamily.INTEGER, SqlTypeFamily.INTEGER, + SqlTypeFamily.INTEGER, SqlTypeFamily.INTEGER, + SqlTypeFamily.INTEGER, SqlTypeFamily.INTEGER), + // DATETIME(date) + OperandTypes.DATE, + // DATETIME(date, time) + OperandTypes.DATE_TIME, + // DATETIME(timestampLtz) + OperandTypes.TIMESTAMP_LTZ, + // DATETIME(timestampLtz, timeZone) + OperandTypes.sequence( + "DATETIME(TIMESTAMP WITH LOCAL TIME ZONE, VARCHAR)", + OperandTypes.TIMESTAMP_LTZ, OperandTypes.CHARACTER)), + SqlFunctionCategory.TIMEDATE); + + /** The "TIME" function. It has the following overloads: + * + * <ul> + * <li>{@code TIME(hour, minute, second)} + * <li>{@code TIME(timestampLtz [, timeZone])} + * <li>{@code TIME(timestamp)} + * </ul> + */ + @LibraryOperator(libraries = {BIG_QUERY}) + public static final SqlFunction TIME = + SqlBasicFunction.create("TIME", ReturnTypes.TIME_NULLABLE, + OperandTypes.or( + // TIME(hour, minute, second) + OperandTypes.family(SqlTypeFamily.INTEGER, SqlTypeFamily.INTEGER, + SqlTypeFamily.INTEGER), + // TIME(timestamp) + OperandTypes.TIMESTAMP_NTZ, + // TIME(timestampLtz) + OperandTypes.TIMESTAMP_LTZ, + // TIME(timestampLtz, timeZone) + OperandTypes.sequence( + "TIME(TIMESTAMP WITH LOCAL TIME ZONE, VARCHAR)", + OperandTypes.TIMESTAMP_LTZ, OperandTypes.CHARACTER)), + SqlFunctionCategory.TIMEDATE); + + /** The "TIMESTAMP" function returns a Calcite + * {@code TIMESTAMP WITH LOCAL TIME ZONE} + * (which BigQuery calls a {@code TIMESTAMP}). It has the following overloads: + * + * <ul> + * <li>{@code TIMESTAMP(string[, timeZone])} + * <li>{@code TIMESTAMP(date[, timeZone])} + * <li>{@code TIMESTAMP(timestamp[, timeZone])} + * </ul> + */ + @LibraryOperator(libraries = {BIG_QUERY}) + public static final SqlFunction TIMESTAMP = + SqlBasicFunction.create("TIMESTAMP", + ReturnTypes.TIMESTAMP_LTZ.andThen(SqlTypeTransforms.TO_NULLABLE), + OperandTypes.or( + // TIMESTAMP(string) + OperandTypes.CHARACTER, + // TIMESTAMP(string, timeZone) + OperandTypes.CHARACTER_CHARACTER, + // TIMESTAMP(date) + OperandTypes.DATE, + // TIMESTAMP(date, timeZone) + OperandTypes.DATE_CHARACTER, + // TIMESTAMP(timestamp) + OperandTypes.TIMESTAMP_NTZ, + // TIMESTAMP(timestamp, timeZone) + OperandTypes.sequence("TIMESTAMP(TIMESTAMP, VARCHAR)", + OperandTypes.TIMESTAMP_NTZ, OperandTypes.CHARACTER)), + SqlFunctionCategory.TIMEDATE); /** The "CURRENT_DATETIME([timezone])" function. */ @LibraryOperator(libraries = {BIG_QUERY}) diff --git a/core/src/main/java/org/apache/calcite/sql/fun/SqlTimestampAddFunction.java b/core/src/main/java/org/apache/calcite/sql/fun/SqlTimestampAddFunction.java index 7b96f6e05c..7cef98c097 100644 --- a/core/src/main/java/org/apache/calcite/sql/fun/SqlTimestampAddFunction.java +++ b/core/src/main/java/org/apache/calcite/sql/fun/SqlTimestampAddFunction.java @@ -28,6 +28,7 @@ import org.apache.calcite.sql.type.OperandTypes; import org.apache.calcite.sql.type.SqlReturnTypeInference; import org.apache.calcite.sql.type.SqlTypeFamily; import org.apache.calcite.sql.type.SqlTypeName; +import org.apache.calcite.sql.type.SqlTypeTransforms; import org.apache.calcite.sql.validate.SqlValidator; import org.apache.calcite.sql.validate.SqlValidatorScope; @@ -70,43 +71,50 @@ public class SqlTimestampAddFunction extends SqlFunction { opBinding -> deduceType(opBinding.getTypeFactory(), opBinding.getOperandLiteralValue(0, TimeUnit.class), - opBinding.getOperandType(1), opBinding.getOperandType(2)); + opBinding.getOperandType(2)); + @Deprecated // to be removed before 2.0 public static RelDataType deduceType(RelDataTypeFactory typeFactory, @Nullable TimeUnit timeUnit, RelDataType operandType1, RelDataType operandType2) { - final RelDataType type; + final RelDataType type = deduceType(typeFactory, timeUnit, operandType2); + return typeFactory.createTypeWithNullability(type, + operandType1.isNullable() || operandType2.isNullable()); + } + + static RelDataType deduceType(RelDataTypeFactory typeFactory, + @Nullable TimeUnit timeUnit, RelDataType datetimeType) { TimeUnit timeUnit2 = first(timeUnit, TimeUnit.EPOCH); switch (timeUnit2) { + case MILLISECOND: + return typeFactory.createSqlType(SqlTypeName.TIMESTAMP, + MILLISECOND_PRECISION); + + case MICROSECOND: + return typeFactory.createSqlType(SqlTypeName.TIMESTAMP, + MICROSECOND_PRECISION); + case HOUR: case MINUTE: case SECOND: - case MILLISECOND: - case MICROSECOND: - switch (timeUnit2) { - case MILLISECOND: - type = typeFactory.createSqlType(SqlTypeName.TIMESTAMP, - MILLISECOND_PRECISION); - break; - case MICROSECOND: - type = typeFactory.createSqlType(SqlTypeName.TIMESTAMP, - MICROSECOND_PRECISION); + SqlTypeName typeName = datetimeType.getSqlTypeName(); + switch (typeName) { + case TIME: + case TIMESTAMP_WITH_LOCAL_TIME_ZONE: break; default: - if (operandType2.getSqlTypeName() == SqlTypeName.TIME) { - type = typeFactory.createSqlType(SqlTypeName.TIME); - } else { - type = typeFactory.createSqlType(SqlTypeName.TIMESTAMP); - } + // If it is not a TIMESTAMP_WITH_LOCAL_TIME_ZONE, operations involving + // HOUR, MINUTE, SECOND with DATE or TIMESTAMP types will result in + // TIMESTAMP type. + typeName = SqlTypeName.TIMESTAMP; + break; } - break; + return typeFactory.createSqlType(typeName); + default: case EPOCH: - type = operandType2; + return datetimeType; } - return typeFactory.createTypeWithNullability(type, - operandType1.isNullable() - || operandType2.isNullable()); } @Override public void validateCall(SqlCall call, SqlValidator validator, @@ -128,7 +136,8 @@ public class SqlTimestampAddFunction extends SqlFunction { /** Creates a SqlTimestampAddFunction. */ SqlTimestampAddFunction(String name) { - super(name, SqlKind.TIMESTAMP_ADD, RETURN_TYPE_INFERENCE, null, + super(name, SqlKind.TIMESTAMP_ADD, + RETURN_TYPE_INFERENCE.andThen(SqlTypeTransforms.TO_NULLABLE), null, OperandTypes.family(SqlTypeFamily.ANY, SqlTypeFamily.INTEGER, SqlTypeFamily.DATETIME), SqlFunctionCategory.TIMEDATE); diff --git a/core/src/main/java/org/apache/calcite/sql/type/OperandTypes.java b/core/src/main/java/org/apache/calcite/sql/type/OperandTypes.java index 8f42fbd9eb..cd927392aa 100644 --- a/core/src/main/java/org/apache/calcite/sql/type/OperandTypes.java +++ b/core/src/main/java/org/apache/calcite/sql/type/OperandTypes.java @@ -50,6 +50,8 @@ import static com.google.common.base.Preconditions.checkArgument; import static org.apache.calcite.util.Static.RESOURCE; +import static java.util.Objects.requireNonNull; + /** * Strategies for checking operand types. * @@ -130,6 +132,14 @@ public abstract class OperandTypes { return family(families, i -> false); } + /** + * Creates a single-operand checker that passes if the operand's type has a + * particular {@link SqlTypeName}. + */ + public static SqlSingleOperandTypeChecker typeName(SqlTypeName typeName) { + return new TypeNameChecker(typeName); + } + /** * Creates a checker that passes if the operand is an interval appropriate for * a given date/time type. For example, the time frame HOUR is appropriate for @@ -439,9 +449,22 @@ public abstract class OperandTypes { public static final SqlSingleOperandTypeChecker TIMESTAMP = family(SqlTypeFamily.TIMESTAMP); + /** Type-checker that matches "TIMESTAMP WITH LOCAL TIME ZONE" but not other + * members of the "TIMESTAMP" family (e.g. "TIMESTAMP"). */ + public static final SqlSingleOperandTypeChecker TIMESTAMP_LTZ = + new TypeNameChecker(SqlTypeName.TIMESTAMP_WITH_LOCAL_TIME_ZONE); + + /** Type-checker that matches "TIMESTAMP" but not other members of the + * "TIMESTAMP" family (e.g. "TIMESTAMP WITH LOCAL TIME ZONE"). */ + public static final SqlSingleOperandTypeChecker TIMESTAMP_NTZ = + new TypeNameChecker(SqlTypeName.TIMESTAMP); + public static final SqlSingleOperandTypeChecker INTERVAL = family(SqlTypeFamily.DATETIME_INTERVAL); + public static final SqlSingleOperandTypeChecker CHARACTER_CHARACTER = + family(SqlTypeFamily.CHARACTER, SqlTypeFamily.CHARACTER); + public static final SqlSingleOperandTypeChecker CHARACTER_CHARACTER_DATETIME = family(SqlTypeFamily.CHARACTER, SqlTypeFamily.CHARACTER, SqlTypeFamily.DATETIME); @@ -803,6 +826,12 @@ public abstract class OperandTypes { public static final SqlSingleOperandTypeChecker DATE_INTERVAL = family(SqlTypeFamily.DATE, SqlTypeFamily.DATETIME_INTERVAL); + public static final SqlSingleOperandTypeChecker DATE_CHARACTER = + family(SqlTypeFamily.DATE, SqlTypeFamily.CHARACTER); + + public static final SqlSingleOperandTypeChecker DATE_TIME = + family(SqlTypeFamily.DATE, SqlTypeFamily.TIME); + public static final SqlSingleOperandTypeChecker DATETIME_INTERVAL = family(SqlTypeFamily.DATETIME, SqlTypeFamily.DATETIME_INTERVAL); @@ -1022,4 +1051,37 @@ public abstract class OperandTypes { "PERIOD (DATETIME, DATETIME)")); } } + + /** Checker that passes if the operand's type has a particular + * {@link SqlTypeName}. */ + private static class TypeNameChecker implements SqlSingleOperandTypeChecker, + ImplicitCastOperandTypeChecker { + final SqlTypeName typeName; + + TypeNameChecker(SqlTypeName typeName) { + this.typeName = requireNonNull(typeName, "typeName"); + } + + @Override public boolean checkSingleOperandType(SqlCallBinding callBinding, + SqlNode operand, int iFormalOperand, boolean throwOnFailure) { + final RelDataType operandType = + callBinding.getValidator().getValidatedNodeType(operand); + return operandType.getSqlTypeName() == typeName; + } + + @Override public boolean checkOperandTypesWithoutTypeCoercion( + SqlCallBinding callBinding, boolean throwOnFailure) { + // FIXME we assume that there is exactly one operand + return checkSingleOperandType(callBinding, callBinding.operand(0), 0, + throwOnFailure); + } + + @Override public SqlTypeFamily getOperandSqlTypeFamily(int iFormalOperand) { + return requireNonNull(typeName.getFamily(), "family"); + } + + @Override public String getAllowedSignatures(SqlOperator op, String opName) { + return opName + "(" + typeName.getSpaceName() + ")"; + } + } } diff --git a/core/src/main/java/org/apache/calcite/sql/type/ReturnTypes.java b/core/src/main/java/org/apache/calcite/sql/type/ReturnTypes.java index d1ae67f1f4..dff8359785 100644 --- a/core/src/main/java/org/apache/calcite/sql/type/ReturnTypes.java +++ b/core/src/main/java/org/apache/calcite/sql/type/ReturnTypes.java @@ -328,6 +328,13 @@ public abstract class ReturnTypes { public static final SqlReturnTypeInference TIMESTAMP_NULLABLE = TIMESTAMP.andThen(SqlTypeTransforms.TO_NULLABLE); + /** + * Type-inference strategy whereby the result type of a call is TIMESTAMP + * WITH LOCAL TIME ZONE. + */ + public static final SqlReturnTypeInference TIMESTAMP_LTZ = + explicit(SqlTypeName.TIMESTAMP_WITH_LOCAL_TIME_ZONE); + /** * Type-inference strategy whereby the result type of a call is Double. */ diff --git a/core/src/main/java/org/apache/calcite/sql/type/SqlTypeName.java b/core/src/main/java/org/apache/calcite/sql/type/SqlTypeName.java index c2939c65d1..c337580fec 100644 --- a/core/src/main/java/org/apache/calcite/sql/type/SqlTypeName.java +++ b/core/src/main/java/org/apache/calcite/sql/type/SqlTypeName.java @@ -69,8 +69,8 @@ public enum SqlTypeName { SqlTypeFamily.TIME), TIMESTAMP(PrecScale.NO_NO | PrecScale.YES_NO, false, Types.TIMESTAMP, SqlTypeFamily.TIMESTAMP), - TIMESTAMP_WITH_LOCAL_TIME_ZONE(PrecScale.NO_NO | PrecScale.YES_NO, false, Types.OTHER, - SqlTypeFamily.TIMESTAMP), + TIMESTAMP_WITH_LOCAL_TIME_ZONE(PrecScale.NO_NO | PrecScale.YES_NO, false, + Types.TIMESTAMP, SqlTypeFamily.TIMESTAMP), INTERVAL_YEAR(PrecScale.NO_NO, false, Types.OTHER, SqlTypeFamily.INTERVAL_YEAR_MONTH), INTERVAL_YEAR_MONTH(PrecScale.NO_NO, false, Types.OTHER, diff --git a/core/src/main/java/org/apache/calcite/sql2rel/SqlNodeToRexConverterImpl.java b/core/src/main/java/org/apache/calcite/sql2rel/SqlNodeToRexConverterImpl.java index 9a8bc7e04b..eb8cee0cab 100644 --- a/core/src/main/java/org/apache/calcite/sql2rel/SqlNodeToRexConverterImpl.java +++ b/core/src/main/java/org/apache/calcite/sql2rel/SqlNodeToRexConverterImpl.java @@ -112,6 +112,10 @@ public class SqlNodeToRexConverterImpl implements SqlNodeToRexConverter { return rexBuilder.makeTimestampLiteral( literal.getValueAs(TimestampString.class), ((SqlTimestampLiteral) literal).getPrec()); + case TIMESTAMP_WITH_LOCAL_TIME_ZONE: + return rexBuilder.makeTimestampWithLocalTimeZoneLiteral( + literal.getValueAs(TimestampString.class), + ((SqlTimestampLiteral) literal).getPrec()); case TIME: return rexBuilder.makeTimeLiteral( literal.getValueAs(TimeString.class), diff --git a/core/src/main/java/org/apache/calcite/sql2rel/StandardConvertletTable.java b/core/src/main/java/org/apache/calcite/sql2rel/StandardConvertletTable.java index e24787b1b5..ddafadccce 100644 --- a/core/src/main/java/org/apache/calcite/sql2rel/StandardConvertletTable.java +++ b/core/src/main/java/org/apache/calcite/sql2rel/StandardConvertletTable.java @@ -154,6 +154,19 @@ public class StandardConvertletTable extends ReflectiveConvertletTable { } }); + // DATE(string) is equivalent to CAST(string AS DATE), + // but other DATE variants are treated as regular functions. + registerOp(SqlLibraryOperators.DATE, + (cx, call) -> { + final RexCall e = + (RexCall) StandardConvertletTable.this.convertCall(cx, call); + if (e.getOperands().size() == 1 + && SqlTypeUtil.isString(e.getOperands().get(0).getType())) { + return cx.getRexBuilder().makeCast(e.type, e.getOperands().get(0)); + } + return e; + }); + registerOp(SqlLibraryOperators.LTRIM, new TrimConvertlet(SqlTrimFunction.Flag.LEADING)); registerOp(SqlLibraryOperators.RTRIM, @@ -345,8 +358,7 @@ public class StandardConvertletTable extends ReflectiveConvertletTable { rexBuilder.makeCast( cx.getTypeFactory() .createTypeWithNullability(type, operand1.getType().isNullable()), - operand1) - )); + operand1))); } /** Converts a call to the DECODE function. */ @@ -1852,6 +1864,9 @@ public class StandardConvertletTable extends ReflectiveConvertletTable { @Override public RexNode convertCall(SqlRexContext cx, SqlCall call) { // TIMESTAMPADD(unit, count, timestamp) // => timestamp + count * INTERVAL '1' UNIT + // TIMESTAMP_ADD(timestamp, interval) + // => timestamp + interval + // "timestamp" may be of type TIMESTAMP or TIMESTAMP WITH LOCAL TIME ZONE. final RexBuilder rexBuilder = cx.getRexBuilder(); SqlIntervalQualifier qualifier; final RexNode op1; diff --git a/innodb/src/main/java/org/apache/calcite/adapter/innodb/InnodbEnumerator.java b/innodb/src/main/java/org/apache/calcite/adapter/innodb/InnodbEnumerator.java index ae5561cfad..3a7bb92ab5 100644 --- a/innodb/src/main/java/org/apache/calcite/adapter/innodb/InnodbEnumerator.java +++ b/innodb/src/main/java/org/apache/calcite/adapter/innodb/InnodbEnumerator.java @@ -97,6 +97,7 @@ class InnodbEnumerator implements Enumerator<Object> { case VARBINARY: return new ByteString((byte[]) obj); case TIMESTAMP: + case TIMESTAMP_WITH_LOCAL_TIME_ZONE: Timestamp timestamp = Utils.convertDateTime((String) obj, relDataType.getPrecision()); return shift(timestamp).getTime(); diff --git a/site/_docs/reference.md b/site/_docs/reference.md index 1fb0dfcfa6..4e8af4b39a 100644 --- a/site/_docs/reference.md +++ b/site/_docs/reference.md @@ -2608,6 +2608,17 @@ The 'C' (compatibility) column contains value: One operator name may correspond to multiple SQL dialects, but with different semantics. +BigQuery's type system uses confusingly different names for types and functions: +* BigQuery's `DATETIME` type represents a local date time, and corresponds to + Calcite's `TIMESTAMP` type; +* BigQuery's `TIMESTAMP` type represents an instant, and corresponds to + Calcite's `TIMESTAMP WITH LOCAL TIME ZONE` type; +* The *timestampLtz* parameter, for instance in `DATE(timestampLtz)`, has + Calcite type `TIMESTAMP WITH LOCAL TIME ZONE`; +* The `TIMESTAMP(string)` function, designed to be compatible the BigQuery + function, return a Calcite `TIMESTAMP WITH LOCAL TIME ZONE`; +* Similarly, `DATETIME(string)` returns a Calcite `TIMESTAMP`. + | C | Operator syntax | Description |:- |:-----------------------------------------------|:----------- | p | expr :: type | Casts *expr* to *type* @@ -2624,10 +2635,18 @@ semantics. | p | CONVERT_TIMEZONE(tz1, tz2, datetime) | Converts the timezone of *datetime* from *tz1* to *tz2* | b | CURRENT_DATETIME([ timeZone ]) | Returns the current time as a TIMESTAMP from *timezone* | m | DAYNAME(datetime) | Returns the name, in the connection's locale, of the weekday in *datetime*; for example, it returns '星期日' for both DATE '2020-02-10' and TIMESTAMP '2020-02-10 10:10:10' +| b | DATE(timestamp) | Extracts the DATE from a *timestamp* +| b | DATE(timestampLtz) | Extracts the DATE from *timestampLtz* (an instant; BigQuery's TIMESTAMP type), assuming UTC +| b | DATE(timestampLtz, timeZone) | Extracts the DATE from *timestampLtz* (an instant; BigQuery's TIMESTAMP type) in *timeZone* | b | DATE(string) | Equivalent to `CAST(string AS DATE)` +| b | DATE(year, month, day) | Returns a DATE value for *year*, *month*, and *day* (all of type INTEGER) | p q | DATEADD(timeUnit, integer, datetime) | Equivalent to `TIMESTAMPADD(timeUnit, integer, datetime)` | p q | DATEDIFF(timeUnit, datetime, datetime2) | Equivalent to `TIMESTAMPDIFF(timeUnit, datetime, datetime2)` | q | DATEPART(timeUnit, datetime) | Equivalent to `EXTRACT(timeUnit FROM datetime)` +| b | DATETIME(date, time) | Converts *date* and *time* to a TIMESTAMP +| b | DATETIME(date) | Converts *date* to a TIMESTAMP value (at midnight) +| b | DATETIME(date, timeZone) | Converts *date* to a TIMESTAMP value (at midnight), in *timeZone* +| b | DATETIME(year, month, day, hour, minute, second) | Creates a TIMESTAMP for *year*, *month*, *day*, *hour*, *minute*, *second* (all of type INTEGER) | b | DATE_FROM_UNIX_DATE(integer) | Returns the DATE that is *integer* days after 1970-01-01 | p | DATE_PART(timeUnit, datetime) | Equivalent to `EXTRACT(timeUnit FROM datetime)` | b | DATE_SUB(date, interval) | Returns the DATE value that occurs *interval* before *date* @@ -2680,6 +2699,16 @@ semantics. | m | STRCMP(string, string) | Returns 0 if both of the strings are same and returns -1 when the first argument is smaller than the second and 1 when the second one is smaller than the first one | b m o p | SUBSTR(string, position [, substringLength ]) | Returns a portion of *string*, beginning at character *position*, *substringLength* characters long. SUBSTR calculates lengths using characters as defined by the input character set | b o | TANH(numeric) | Returns the hyperbolic tangent of *numeric* +| b | TIME(hour, minute, second) | Returns a TIME value *hour*, *minute*, *second* (all of type INTEGER) +| b | TIME(timestamp) | Extracts the TIME from *timestamp* (a local time; BigQuery's DATETIME type) +| b | TIME(instant) | Extracts the TIME from *timestampLtz* (an instant; BigQuery's TIMESTAMP type), assuming UTC +| b | TIME(instant, timeZone) | Extracts the time from *timestampLtz* (an instant; BigQuery's TIMESTAMP type), in *timeZone* +| b | TIMESTAMP(string) | Equivalent to `CAST(string AS TIMESTAMP WITH LOCAL TIME ZONE)` +| b | TIMESTAMP(string, timeZone) | Equivalent to `CAST(string AS TIMESTAMP WITH LOCAL TIME ZONE)`, converted to *timeZone* +| b | TIMESTAMP(date) | Converts *date* to a TIMESTAMP WITH LOCAL TIME ZONE value (at midnight) +| b | TIMESTAMP(date, timeZone) | Converts *date* to a TIMESTAMP WITH LOCAL TIME ZONE value (at midnight), in *timeZone* +| b | TIMESTAMP(timestamp) | Converts *timestamp* to a TIMESTAMP WITH LOCAL TIME ZONE, assuming a UTC +| b | TIMESTAMP(timestamp, timeZone) | Converts *timestamp* to a TIMESTAMP WITH LOCAL TIME ZONE, in *timeZone* | b | TIMESTAMP_ADD(timestamp, interval) | Returns the TIMESTAMP value that occurs *interval* after *timestamp* | b | TIMESTAMP_DIFF(timestamp, timestamp2, timeUnit) | Returns the whole number of *timeUnit* between *timestamp* and *timestamp2*. Equivalent to `TIMESTAMPDIFF(timeUnit, timestamp2, timestamp)` and `(timestamp - timestamp2) timeUnit` | b | TIMESTAMP_MICROS(integer) | Returns the TIMESTAMP that is *integer* microseconds after 1970-01-01 00:00:00 diff --git a/testkit/src/main/java/org/apache/calcite/test/ConnectionFactories.java b/testkit/src/main/java/org/apache/calcite/test/ConnectionFactories.java index cb02df29eb..2a09b98126 100644 --- a/testkit/src/main/java/org/apache/calcite/test/ConnectionFactories.java +++ b/testkit/src/main/java/org/apache/calcite/test/ConnectionFactories.java @@ -18,6 +18,7 @@ package org.apache.calcite.test; import org.apache.calcite.avatica.ConnectionProperty; import org.apache.calcite.jdbc.CalciteConnection; +import org.apache.calcite.rel.type.RelProtoDataType; import org.apache.calcite.runtime.FlatLists; import org.apache.calcite.schema.Schema; import org.apache.calcite.schema.SchemaPlus; @@ -83,6 +84,12 @@ public abstract class ConnectionFactories { return new DefaultSchemaPostProcessor(schema); } + /** Returns a post-processor that adds a type. */ + public static CalciteAssert.ConnectionPostProcessor addType(String name, + RelProtoDataType protoDataType) { + return new AddTypePostProcessor(name, protoDataType); + } + /** Connection factory that uses a given map of (name, value) pairs and * optionally an initial schema. */ private static class MapConnectionFactory implements ConnectionFactory { @@ -162,6 +169,25 @@ public abstract class ConnectionFactories { } } + /** Post-processor that adds a type. */ + private static class AddTypePostProcessor + implements CalciteAssert.ConnectionPostProcessor { + private final String name; + private final RelProtoDataType protoDataType; + + AddTypePostProcessor(String name, RelProtoDataType protoDataType) { + this.name = requireNonNull(name, "name"); + this.protoDataType = requireNonNull(protoDataType, "protoDataType"); + } + + @Override public Connection apply(Connection connection) throws SQLException { + CalciteConnection con = connection.unwrap(CalciteConnection.class); + SchemaPlus rootSchema = con.getRootSchema(); + rootSchema.add(name, protoDataType); + return connection; + } + } + /** Post-processor that sets a default schema name. */ private static class DefaultSchemaPostProcessor implements CalciteAssert.ConnectionPostProcessor { diff --git a/testkit/src/main/java/org/apache/calcite/test/SqlOperatorTest.java b/testkit/src/main/java/org/apache/calcite/test/SqlOperatorTest.java index 995406dce3..9d6a171d38 100644 --- a/testkit/src/main/java/org/apache/calcite/test/SqlOperatorTest.java +++ b/testkit/src/main/java/org/apache/calcite/test/SqlOperatorTest.java @@ -5100,10 +5100,10 @@ public class SqlOperatorTest { f.checkNull("timestamp_micros(cast(null as bigint))"); f.checkScalar("date_from_unix_date(0)", "1970-01-01", "DATE NOT NULL"); - // Have to quote the "DATE" function because we're not using the Babel - // parser. In the regular parser, DATE is a reserved keyword. - f.checkNull("\"DATE\"(null)"); - f.checkScalar("\"DATE\"('1985-12-06')", "1985-12-06", "DATE NOT NULL"); + // DATE is a reserved keyword, but the parser has special treatment to + // allow it as a function. + f.checkNull("DATE(null)"); + f.checkScalar("DATE('1985-12-06')", "1985-12-06", "DATE NOT NULL"); f.checkType("CURRENT_DATETIME()", "TIMESTAMP(0) NOT NULL"); f.checkType("CURRENT_DATETIME('America/Los_Angeles')", "TIMESTAMP(0) NOT NULL"); f.checkType("CURRENT_DATETIME(CAST(NULL AS VARCHAR(20)))", "TIMESTAMP(0)");
