Repository: incubator-impala Updated Branches: refs/heads/master 227a180ab -> dad489669
IMPALA-5137: [DOCS] Document TIMESTAMP for Kudu tables Change-Id: Ib889198eb2c918c969c7613dd1ddf65a801f7926 Reviewed-on: http://gerrit.cloudera.org:8080/7035 Reviewed-by: Matthew Jacobs <[email protected]> Tested-by: Impala Public Jenkins Project: http://git-wip-us.apache.org/repos/asf/incubator-impala/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-impala/commit/621af4b7 Tree: http://git-wip-us.apache.org/repos/asf/incubator-impala/tree/621af4b7 Diff: http://git-wip-us.apache.org/repos/asf/incubator-impala/diff/621af4b7 Branch: refs/heads/master Commit: 621af4b7cf932c26118ca4a3a9dc3dcc43f11adb Parents: 227a180 Author: John Russell <[email protected]> Authored: Wed May 31 16:15:02 2017 -0700 Committer: Impala Public Jenkins <[email protected]> Committed: Tue Jun 6 20:32:44 2017 +0000 ---------------------------------------------------------------------- docs/shared/impala_common.xml | 45 ++++++++++++- docs/topics/impala_authorization.xml | 3 +- docs/topics/impala_datetime_functions.xml | 4 ++ docs/topics/impala_kudu.xml | 47 +++++++++++--- docs/topics/impala_timestamp.xml | 89 ++++++++++++++++++++------ 5 files changed, 154 insertions(+), 34 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/621af4b7/docs/shared/impala_common.xml ---------------------------------------------------------------------- diff --git a/docs/shared/impala_common.xml b/docs/shared/impala_common.xml index 8bd3cbb..ad86803 100644 --- a/docs/shared/impala_common.xml +++ b/docs/shared/impala_common.xml @@ -3418,7 +3418,7 @@ sudo pip-python install ssl</codeblock> </p> <p id="kudu_unsupported_data_type" rev="kudu"> - Currently, the data types <codeph>DECIMAL</codeph>, <codeph>TIMESTAMP</codeph>, <codeph>CHAR</codeph>, <codeph>VARCHAR</codeph>, + Currently, the data types <codeph>DECIMAL</codeph>, <codeph>CHAR</codeph>, <codeph>VARCHAR</codeph>, <codeph>ARRAY</codeph>, <codeph>MAP</codeph>, and <codeph>STRUCT</codeph> cannot be used with Kudu tables. </p> @@ -3485,6 +3485,49 @@ CREATE EXTERNAL TABLE impala_name STORED AS KUDU and subject to change. </p> + <p rev="2.9.0 IMPALA-5137" id="kudu_timestamp_nanoseconds_caveat"> + The nanosecond portion of an Impala <codeph>TIMESTAMP</codeph> value + is rounded to the nearest microsecond when that value is stored in a + Kudu table. + </p> + + <p rev="2.9.0 IMPALA-5137" id="kudu_timestamp_details"> + In <keyword keyref="impala29_full"/> and higher, you can include <codeph>TIMESTAMP</codeph> + columns in Kudu tables, instead of representing the date and time as a <codeph>BIGINT</codeph> + value. The behavior of <codeph>TIMESTAMP</codeph> for Kudu tables has some special considerations: + + <ul> + <li> + <p> + Any nanoseconds in the original 96-bit value produced by Impala are not stored, because + Kudu represents date/time columns using 64-bit values. The nanosecond portion of the value + is rounded, not truncated. Therefore, a <codeph>TIMESTAMP</codeph> value + that you store in a Kudu table might not be bit-for-bit identical to the value returned by a query. + </p> + </li> + <li> + <p> + The conversion between the Impala 96-bit representation and the Kudu 64-bit representation + introduces some performance overhead when reading or writing <codeph>TIMESTAMP</codeph> + columns. You can minimize the overhead during writes by performing inserts through the + Kudu API. Because the overhead during reads applies to each query, you might continue to + use a <codeph>BIGINT</codeph> column to represent date/time values in performance-critical + applications. + </p> + </li> + <li> + <p> + The Impala <codeph>TIMESTAMP</codeph> type has a narrower range for years than the underlying + Kudu data type. Impala can represent years 1400-9999. If year values outside this range + are written to a Kudu table by a non-Impala client, Impala returns <codeph>NULL</codeph> + by default when reading those <codeph>TIMESTAMP</codeph> values during a query. Or, if the + <codeph>ABORT_ON_ERROR</codeph> query option is enabled, the query fails when it encounters + a value with an out-of-range year. + </p> + </li> + </ul> + </p> + </section> </conbody> http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/621af4b7/docs/topics/impala_authorization.xml ---------------------------------------------------------------------- diff --git a/docs/topics/impala_authorization.xml b/docs/topics/impala_authorization.xml index f82a790..fe5faa9 100644 --- a/docs/topics/impala_authorization.xml +++ b/docs/topics/impala_authorization.xml @@ -825,8 +825,7 @@ sales = hdfs://ha-nn-uri/etc/access/sales.ini <p> To enable URIs in per-DB policy files, the Java configuration option <codeph>sentry.allow.uri.db.policyfile</codeph> - must be set to <codeph>true</codeph>. - For example: + must be set to <codeph>true</codeph>. For example: </p> <codeblock>JAVA_TOOL_OPTIONS="-Dsentry.allow.uri.db.policyfile=true" http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/621af4b7/docs/topics/impala_datetime_functions.xml ---------------------------------------------------------------------- diff --git a/docs/topics/impala_datetime_functions.xml b/docs/topics/impala_datetime_functions.xml index 54ae475..da5d72a 100644 --- a/docs/topics/impala_datetime_functions.xml +++ b/docs/topics/impala_datetime_functions.xml @@ -1618,6 +1618,8 @@ with t1 as (select trunc(now(), 'dd') as today) <p> <b>Return type:</b> <codeph>timestamp</codeph> </p> + <p conref="../shared/impala_common.xml#common/kudu_blurb"/> + <p conref="../shared/impala_common.xml#common/kudu_timestamp_nanoseconds_caveat"/> <p conref="../shared/impala_common.xml#common/example_blurb"/> <codeblock> select now() as right_now, nanoseconds_add(now(), 1) as 1_nanosecond_later; @@ -1652,6 +1654,8 @@ select now() as right_now, nanoseconds_add(now(), 1e9) as 1_second_later; <p> <b>Return type:</b> <codeph>timestamp</codeph> </p> + <p conref="../shared/impala_common.xml#common/kudu_blurb"/> + <p conref="../shared/impala_common.xml#common/kudu_timestamp_nanoseconds_caveat"/> <codeblock> select now() as right_now, nanoseconds_sub(now(), 1) as 1_nanosecond_earlier; +-------------------------------+-------------------------------+ http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/621af4b7/docs/topics/impala_kudu.xml ---------------------------------------------------------------------- diff --git a/docs/topics/impala_kudu.xml b/docs/topics/impala_kudu.xml index 728d1fc..3334d6b 100644 --- a/docs/topics/impala_kudu.xml +++ b/docs/topics/impala_kudu.xml @@ -963,9 +963,36 @@ partition by hash (school) partitions 10, <conbody> + <p conref="../shared/impala_common.xml#common/kudu_timestamp_details"/> + +<codeblock rev="2.9.0 IMPALA-5137"><![CDATA[--- Make a table representing a date/time value as TIMESTAMP. +-- The strings representing the partition bounds are automatically +-- cast to TIMESTAMP values. +create table native_timestamp(id bigint, when_exactly timestamp, event string, primary key (id, when_exactly)) + partition by hash (id) partitions 20, + range (when_exactly) + ( + partition '2015-01-01' <= values < '2016-01-01', + partition '2016-01-01' <= values < '2017-01-01', + partition '2017-01-01' <= values < '2018-01-01' + ) + stored as kudu; + +insert into native_timestamp values (12345, now(), 'Working on doc examples'); + +select * from native_timestamp; ++-------+-------------------------------+-------------------------+ +| id | when_exactly | event | ++-------+-------------------------------+-------------------------+ +| 12345 | 2017-05-31 16:27:42.667542000 | Working on doc examples | ++-------+-------------------------------+-------------------------+ +]]> +</codeblock> + <p> - Because currently a Kudu table cannot have a column with the Impala - <codeph>TIMESTAMP</codeph> type, expect to store date/time information as the number + Because Kudu tables have some performance overhead to convert <codeph>TIMESTAMP</codeph> + columns to the Impala 96-bit internal representation, for performance-critical + applications you might store date/time information as the number of seconds, milliseconds, or microseconds since the Unix epoch date of January 1, 1970. Specify the column as <codeph>BIGINT</codeph> in the Impala <codeph>CREATE TABLE</codeph> statement, corresponding to an 8-byte integer (an @@ -1013,14 +1040,14 @@ select unix_timestamp('2017-01-01'); -- Construct 1 range partition and 20 associated hash partitions for each year. -- Use date/time conversion functions to express the ranges as human-readable dates. create table time_series(id bigint, when_exactly bigint, event string, primary key (id, when_exactly)) - partition by hash (id) partitions 20, - range (when_exactly) - ( - partition unix_timestamp('2015-01-01') <= values < unix_timestamp('2016-01-01'), - partition unix_timestamp('2016-01-01') <= values < unix_timestamp('2017-01-01'), - partition unix_timestamp('2017-01-01') <= values < unix_timestamp('2018-01-01') - ) - stored as kudu; + partition by hash (id) partitions 20, + range (when_exactly) + ( + partition unix_timestamp('2015-01-01') <= values < unix_timestamp('2016-01-01'), + partition unix_timestamp('2016-01-01') <= values < unix_timestamp('2017-01-01'), + partition unix_timestamp('2017-01-01') <= values < unix_timestamp('2018-01-01') + ) + stored as kudu; -- On insert, we can transform a human-readable date/time into a numeric value. insert into time_series values (12345, unix_timestamp('2017-01-25 23:24:56'), 'Working on doc examples'); http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/621af4b7/docs/topics/impala_timestamp.xml ---------------------------------------------------------------------- diff --git a/docs/topics/impala_timestamp.xml b/docs/topics/impala_timestamp.xml index 1f37f30..94e9c69 100644 --- a/docs/topics/impala_timestamp.xml +++ b/docs/topics/impala_timestamp.xml @@ -384,26 +384,6 @@ Fetched 2 row(s) in 0.22s ERROR: AnalysisException: Type 'TIMESTAMP' is not supported as partition-column type in column: happened </codeblock> - <p conref="../shared/impala_common.xml#common/example_blurb"/> - -<codeblock>select cast('1966-07-30' as timestamp); -select cast('1985-09-25 17:45:30.005' as timestamp); -select cast('08:30:00' as timestamp); -select hour('1970-01-01 15:30:00'); -- Succeeds, returns 15. -select hour('1970-01-01 15:30'); -- Returns NULL because seconds field required. -select hour('1970-01-01 27:30:00'); -- Returns NULL because hour value out of range. -select dayofweek('2004-06-13'); -- Returns 1, representing Sunday. -select dayname('2004-06-13'); -- Returns 'Sunday'. -select date_add('2004-06-13', 365); -- Returns 2005-06-13 with zeros for hh:mm:ss fields. -select day('2004-06-13'); -- Returns 13. -select datediff('1989-12-31','1984-09-01'); -- How many days between these 2 dates? -select now(); -- Returns current date and time in local timezone. - -create table dates_and_times (t timestamp); -insert into dates_and_times values - ('1966-07-30'), ('1985-09-25 17:45:30.005'), ('08:30:00'), (now()); -</codeblock> - <p conref="../shared/impala_common.xml#common/null_bad_timestamp_cast"/> <p conref="../shared/impala_common.xml#common/partitioning_worrisome"/> @@ -437,7 +417,74 @@ insert into dates_and_times values <p conref="../shared/impala_common.xml#common/avro_no_timestamp"/> <p conref="../shared/impala_common.xml#common/kudu_blurb"/> - <p conref="../shared/impala_common.xml#common/kudu_unsupported_data_type"/> + <p conref="../shared/impala_common.xml#common/kudu_timestamp_details"/> + + <p conref="../shared/impala_common.xml#common/example_blurb"/> + + <p> + The following examples demonstrate using <codeph>TIMESTAMP</codeph> values + with built-in functions: + </p> + +<codeblock>select cast('1966-07-30' as timestamp); +select cast('1985-09-25 17:45:30.005' as timestamp); +select cast('08:30:00' as timestamp); +select hour('1970-01-01 15:30:00'); -- Succeeds, returns 15. +select hour('1970-01-01 15:30'); -- Returns NULL because seconds field required. +select hour('1970-01-01 27:30:00'); -- Returns NULL because hour value out of range. +select dayofweek('2004-06-13'); -- Returns 1, representing Sunday. +select dayname('2004-06-13'); -- Returns 'Sunday'. +select date_add('2004-06-13', 365); -- Returns 2005-06-13 with zeros for hh:mm:ss fields. +select day('2004-06-13'); -- Returns 13. +select datediff('1989-12-31','1984-09-01'); -- How many days between these 2 dates? +select now(); -- Returns current date and time in local timezone. +</codeblock> + + <p> + The following examples demonstrate using <codeph>TIMESTAMP</codeph> values + with HDFS-backed tables: + </p> + +<codeblock>create table dates_and_times (t timestamp); +insert into dates_and_times values + ('1966-07-30'), ('1985-09-25 17:45:30.005'), ('08:30:00'), (now()); +</codeblock> + + <p rev="IMPALA-5137"> + The following examples demonstrate using <codeph>TIMESTAMP</codeph> values + with Kudu tables: + </p> + +<codeblock rev="IMPALA-5137">create table timestamp_t (x int primary key, s string, t timestamp, b bigint) + partition by hash (x) partitions 16 + stored as kudu; + +-- The default value of now() has microsecond precision, so the final 3 digits +-- representing nanoseconds are all zero. +insert into timestamp_t values (1, cast(now() as string), now(), unix_timestamp(now())); + +-- Values with 1-499 nanoseconds are rounded down in the Kudu TIMESTAMP column. +insert into timestamp_t values (2, cast(now() + interval 100 nanoseconds as string), now() + interval 100 nanoseconds, unix_timestamp(now() + interval 100 nanoseconds)); +insert into timestamp_t values (3, cast(now() + interval 499 nanoseconds as string), now() + interval 499 nanoseconds, unix_timestamp(now() + interval 499 nanoseconds)); + +-- Values with 500-999 nanoseconds are rounded up in the Kudu TIMESTAMP column. +insert into timestamp_t values (4, cast(now() + interval 500 nanoseconds as string), now() + interval 500 nanoseconds, unix_timestamp(now() + interval 500 nanoseconds)); +insert into timestamp_t values (5, cast(now() + interval 501 nanoseconds as string), now() + interval 501 nanoseconds, unix_timestamp(now() + interval 501 nanoseconds)); + +-- The string representation shows how underlying Impala TIMESTAMP can have nanosecond precision. +-- The TIMESTAMP column shows how timestamps in a Kudu table are rounded to microsecond precision. +-- The BIGINT column represents seconds past the epoch and so if not affected much by nanoseconds. +select s, t, b from timestamp_t order by t; ++-------------------------------+-------------------------------+------------+ +| s | t | b | ++-------------------------------+-------------------------------+------------+ +| 2017-05-31 15:30:05.107157000 | 2017-05-31 15:30:05.107157000 | 1496244605 | +| 2017-05-31 15:30:28.868151100 | 2017-05-31 15:30:28.868151000 | 1496244628 | +| 2017-05-31 15:34:33.674692499 | 2017-05-31 15:34:33.674692000 | 1496244873 | +| 2017-05-31 15:35:04.769166500 | 2017-05-31 15:35:04.769167000 | 1496244904 | +| 2017-05-31 15:35:33.033082501 | 2017-05-31 15:35:33.033083000 | 1496244933 | ++-------------------------------+-------------------------------+------------+ +</codeblock> <p conref="../shared/impala_common.xml#common/related_info"/>
