http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/3c2c8f12/docs/topics/impala_show.xml ---------------------------------------------------------------------- diff --git a/docs/topics/impala_show.xml b/docs/topics/impala_show.xml index 1e8c17d..dd60d82 100644 --- a/docs/topics/impala_show.xml +++ b/docs/topics/impala_show.xml @@ -2,11 +2,13 @@ <concept id="show"> <title>SHOW Statement</title> - <titlealts><navtitle>SHOW</navtitle></titlealts> + <titlealts audience="PDF"><navtitle>SHOW</navtitle></titlealts> <prolog> <metadata> <data name="Category" value="Impala"/> <data name="Category" value="SQL"/> + <data name="Category" value="Developers"/> + <data name="Category" value="Data Analysts"/> <data name="Category" value="Reports"/> </metadata> </prolog> @@ -128,7 +130,7 @@ show tables '*dim*|*fact*';</codeblock> <p conref="../shared/impala_common.xml#common/permissions_blurb"/> <p rev="CDH-19187"> The user ID that the <cmdname>impalad</cmdname> daemon runs under, - typically the <codeph>impala</codeph> user, must have read + typically the <codeph>impala</codeph> user, must have read permission for all the table files, read and execute permission for all the directories that make up the table, and execute permission for the database directory and all its parent directories. </p> @@ -136,26 +138,27 @@ show tables '*dim*|*fact*';</codeblock> <p conref="../shared/impala_common.xml#common/example_blurb"/> <p> - The following example constructs <codeph>SHOW FILES</codeph> statements - for an unpartitioned tables using text format: + The following example shows a <codeph>SHOW FILES</codeph> statement + for an unpartitioned table using text format: </p> -<codeblock>[localhost:21000] > create table unpartitioned_text (x bigint, s string); -[localhost:21000] > insert into unpartitioned_text (x, s) select id, name from oreilly.sample_data limit 20e6; -[localhost:21000] > show files in unpartitioned_text; -+-------------------------------------------------------------------------------------+----------+-----------+ -| path | size | partition | -+-------------------------------------------------------------------------------------+----------+-----------+ -| hdfs://<varname>impala_data_dir</varname>/show_files.db/unpartitioned_text/35665776ef85cfaf_1012432410_data.0. | 448.31MB | | -+-------------------------------------------------------------------------------------+----------+-----------+ -[localhost:21000] > insert into unpartitioned_text (x, s) select id, name from oreilly.sample_data limit 100e6; -[localhost:21000] > show files in unpartitioned_text; -+---------------------------------------------------------------------------------------------+----------+-----------+ -| path | size | partition | -+---------------------------------------------------------------------------------------------+----------+-----------+ -| hdfs://<varname>impala_data_dir</varname>/show_files.db/unpartitioned_text/35665776ef85cfaf_1012432410_data.0. | 448.31MB | | -| hdfs://<varname>impala_data_dir</varname>/show_files.db/unpartitioned_text/ac3dba252a8952b8_1663177415_data.0. | 2.19GB | | -+---------------------------------------------------------------------------------------------+----------+-----------+ +<codeblock scale="60">[localhost:21000] > create table unpart_text (x bigint, s string); +[localhost:21000] > insert into unpart_text (x, s) select id, name + > from oreilly.sample_data limit 20e6; +[localhost:21000] > show files in unpart_text; ++------------------------------------------------------------------------------+----------+-----------+ +| path | size | partition | ++------------------------------------------------------------------------------+----------+-----------+ +| hdfs://<varname>impala_data_dir</varname>/show_files.db/unpart_text/35665776ef85cfaf_1012432410_data.0. | 448.31MB | | ++------------------------------------------------------------------------------+----------+-----------+ +[localhost:21000] > insert into unpart_text (x, s) select id, name from oreilly.sample_data limit 100e6; +[localhost:21000] > show files in unpart_text; ++--------------------------------------------------------------------------------------+----------+-----------+ +| path | size | partition | ++--------------------------------------------------------------------------------------+----------+-----------+ +| hdfs://<varname>impala_data_dir</varname>/show_files.db/unpart_text/35665776ef85cfaf_1012432410_data.0. | 448.31MB | | +| hdfs://<varname>impala_data_dir</varname>/show_files.db/unpart_text/ac3dba252a8952b8_1663177415_data.0. | 2.19GB | | ++--------------------------------------------------------------------------------------+----------+-----------+ </codeblock> <p> @@ -165,36 +168,36 @@ show tables '*dim*|*fact*';</codeblock> an <codeph>INSERT ... SELECT</codeph> into a different table, possibly converting the data to Parquet in the process: </p> -<codeblock>[localhost:21000] > insert into unpartitioned_text values (10,'hello'), (20, 'world'); -[localhost:21000] > insert into unpartitioned_text values (-1,'foo'), (-1000, 'bar'); -[localhost:21000] > show files in unpartitioned_text; -+---------------------------------------------------------------------------------------------+----------+-----------+ -| path | size | partition | -+---------------------------------------------------------------------------------------------+----------+-----------+ -| hdfs://<varname>impala_data_dir</varname>/show_files.db/unpartitioned_text/4f11b8bdf8b6aa92_238145083_data.0. | 18B | | -| hdfs://<varname>impala_data_dir</varname>/show_files.db/unpartitioned_text/35665776ef85cfaf_1012432410_data.0. | 448.31MB | | -| hdfs://<varname>impala_data_dir</varname>/show_files.db/unpartitioned_text/ac3dba252a8952b8_1663177415_data.0. | 2.19GB | | -| hdfs://<varname>impala_data_dir</varname>/show_files.db/unpartitioned_text/cfb8252452445682_1868457216_data.0. | 17B | | -+---------------------------------------------------------------------------------------------+----------+-----------+ -[localhost:21000] > create table unpartitioned_parquet stored as parquet as select * from unpartitioned_text; +<codeblock scale="60">[localhost:21000] > insert into unpart_text values (10,'hello'), (20, 'world'); +[localhost:21000] > insert into unpart_text values (-1,'foo'), (-1000, 'bar'); +[localhost:21000] > show files in unpart_text; ++--------------------------------------------------------------------------------------+----------+ +| path | size | ++--------------------------------------------------------------------------------------+----------+ +| hdfs://<varname>impala_data_dir</varname>/show_files.db/unpart_text/4f11b8bdf8b6aa92_238145083_data.0. | 18B +| hdfs://<varname>impala_data_dir</varname>/show_files.db/unpart_text/35665776ef85cfaf_1012432410_data.0. | 448.31MB +| hdfs://<varname>impala_data_dir</varname>/show_files.db/unpart_text/ac3dba252a8952b8_1663177415_data.0. | 2.19GB +| hdfs://<varname>impala_data_dir</varname>/show_files.db/unpart_text/cfb8252452445682_1868457216_data.0. | 17B ++--------------------------------------------------------------------------------------+----------+ +[localhost:21000] > create table unpart_parq stored as parquet as select * from unpart_text; +---------------------------+ | summary | +---------------------------+ | Inserted 120000002 row(s) | +---------------------------+ -[localhost:21000] > show files in unpartitioned_parquet; -+----------------------------------------------------------------------------------------------------+----------+-----------+ -| path | size | partition | -+----------------------------------------------------------------------------------------------------+----------+-----------+ -| hdfs://<varname>impala_data_dir</varname>/show_files.db/unpartitioned_parquet/60798d96ba630184_549959007_data.0.parq | 255.36MB | | -| hdfs://<varname>impala_data_dir</varname>/show_files.db/unpartitioned_parquet/60798d96ba630184_549959007_data.1.parq | 178.52MB | | -| hdfs://<varname>impala_data_dir</varname>/show_files.db/unpartitioned_parquet/60798d96ba630185_549959007_data.0.parq | 255.37MB | | -| hdfs://<varname>impala_data_dir</varname>/show_files.db/unpartitioned_parquet/60798d96ba630185_549959007_data.1.parq | 57.71MB | | -| hdfs://<varname>impala_data_dir</varname>/show_files.db/unpartitioned_parquet/60798d96ba630186_2141167244_data.0.parq | 255.40MB | | -| hdfs://<varname>impala_data_dir</varname>/show_files.db/unpartitioned_parquet/60798d96ba630186_2141167244_data.1.parq | 175.52MB | | -| hdfs://<varname>impala_data_dir</varname>/show_files.db/unpartitioned_parquet/60798d96ba630187_1006832086_data.0.parq | 255.40MB | | -| hdfs://<varname>impala_data_dir</varname>/show_files.db/unpartitioned_parquet/60798d96ba630187_1006832086_data.1.parq | 214.61MB | | -+----------------------------------------------------------------------------------------------------+----------+-----------+ +[localhost:21000] > show files in unpart_parq; ++------------------------------------------------------------------------------------------+----------+ +| path | size | ++------------------------------------------------------------------------------------------+----------+ +| hdfs://<varname>impala_data_dir</varname>/show_files.db/unpart_parq/60798d96ba630184_549959007_data.0.parq | 255.36MB | +| hdfs://<varname>impala_data_dir</varname>/show_files.db/unpart_parq/60798d96ba630184_549959007_data.1.parq | 178.52MB | +| hdfs://<varname>impala_data_dir</varname>/show_files.db/unpart_parq/60798d96ba630185_549959007_data.0.parq | 255.37MB | +| hdfs://<varname>impala_data_dir</varname>/show_files.db/unpart_parq/60798d96ba630185_549959007_data.1.parq | 57.71MB | +| hdfs://<varname>impala_data_dir</varname>/show_files.db/unpart_parq/60798d96ba630186_2141167244_data.0.parq | 255.40MB | +| hdfs://<varname>impala_data_dir</varname>/show_files.db/unpart_parq/60798d96ba630186_2141167244_data.1.parq | 175.52MB | +| hdfs://<varname>impala_data_dir</varname>/show_files.db/unpart_parq/60798d96ba630187_1006832086_data.0.parq | 255.40MB | +| hdfs://<varname>impala_data_dir</varname>/show_files.db/unpart_parq/60798d96ba630187_1006832086_data.1.parq | 214.61MB | ++------------------------------------------------------------------------------------------+----------+ </codeblock> <p> @@ -202,14 +205,17 @@ show tables '*dim*|*fact*';</codeblock> with data in two different partitions, and two empty partitions. The partitions with no data are not represented in the <codeph>SHOW FILES</codeph> output. </p> -<codeblock>[localhost:21000] > create table partitioned_text (x bigint, y int, s string) partitioned by (year bigint, month bigint, day bigint); -[localhost:21000] > insert overwrite partitioned_text (x, y, s) partition (year=2014,month=1,day=1) select id, val, name from oreilly.normalized_parquet +<codeblock scale="60">[localhost:21000] > create table part_text (x bigint, y int, s string) + > partitioned by (year bigint, month bigint, day bigint); +[localhost:21000] > insert overwrite part_text (x, y, s) partition (year=2014,month=1,day=1) + > select id, val, name from oreilly.normalized_parquet where id between 1 and 1000000; -[localhost:21000] > insert overwrite partitioned_text (x, y, s) partition (year=2014,month=1,day=2) select id, val, name from oreilly.normalized_parquet -where id between 1000001 and 2000000; -[localhost:21000] > alter table partitioned_text add partition (year=2014,month=1,day=3); -[localhost:21000] > alter table partitioned_text add partition (year=2014,month=1,day=4); -[localhost:21000] > show partitions partitioned_text; +[localhost:21000] > insert overwrite part_text (x, y, s) partition (year=2014,month=1,day=2) + > select id, val, name from oreilly.normalized_parquet + > where id between 1000001 and 2000000; +[localhost:21000] > alter table part_text add partition (year=2014,month=1,day=3); +[localhost:21000] > alter table part_text add partition (year=2014,month=1,day=4); +[localhost:21000] > show partitions part_text; +-------+-------+-----+-------+--------+---------+--------------+-------------------+--------+-------------------+ | year | month | day | #Rows | #Files | Size | Bytes Cached | Cache Replication | Format | Incremental stats | +-------+-------+-----+-------+--------+---------+--------------+-------------------+--------+-------------------+ @@ -219,19 +225,19 @@ where id between 1000001 and 2000000; | 2014 | 1 | 4 | -1 | 0 | 0B | NOT CACHED | NOT CACHED | TEXT | false | | Total | | | -1 | 8 | 51.38MB | 0B | | | | +-------+-------+-----+-------+--------+---------+--------------+-------------------+--------+-------------------+ -[localhost:21000] > show files in partitioned_text; -+----------------------------------------------------------------------------------------------------------------+--------+-------------------------+ -| path | size | partition | -+----------------------------------------------------------------------------------------------------------------+--------+-------------------------+ -| hdfs://<varname>impala_data_dir</varname>/show_files.db/partitioned_text/year=2014/month=1/day=1/80732d9dc80689f_1418645991_data.0. | 5.77MB | year=2014/month=1/day=1 | -| hdfs://<varname>impala_data_dir</varname>/show_files.db/partitioned_text/year=2014/month=1/day=1/80732d9dc8068a0_1418645991_data.0. | 6.25MB | year=2014/month=1/day=1 | -| hdfs://<varname>impala_data_dir</varname>/show_files.db/partitioned_text/year=2014/month=1/day=1/80732d9dc8068a1_147082319_data.0. | 7.16MB | year=2014/month=1/day=1 | -| hdfs://<varname>impala_data_dir</varname>/show_files.db/partitioned_text/year=2014/month=1/day=1/80732d9dc8068a2_2111411753_data.0. | 5.98MB | year=2014/month=1/day=1 | -| hdfs://<varname>impala_data_dir</varname>/show_files.db/partitioned_text/year=2014/month=1/day=2/21a828cf494b5bbb_501271652_data.0. | 6.42MB | year=2014/month=1/day=2 | -| hdfs://<varname>impala_data_dir</varname>/show_files.db/partitioned_text/year=2014/month=1/day=2/21a828cf494b5bbc_501271652_data.0. | 6.62MB | year=2014/month=1/day=2 | -| hdfs://<varname>impala_data_dir</varname>/show_files.db/partitioned_text/year=2014/month=1/day=2/21a828cf494b5bbd_1393490200_data.0. | 6.98MB | year=2014/month=1/day=2 | -| hdfs://<varname>impala_data_dir</varname>/show_files.db/partitioned_text/year=2014/month=1/day=2/21a828cf494b5bbe_1393490200_data.0. | 6.20MB | year=2014/month=1/day=2 | -+----------------------------------------------------------------------------------------------------------------+--------+-------------------------+ +[localhost:21000] > show files in part_text; ++---------------------------------------------------------------------------------------------------------+--------+-------------------------+ +| path | size | partition | ++---------------------------------------------------------------------------------------------------------+--------+-------------------------+ +| hdfs://<varname>impala_data_dir</varname>/show_files.db/part_text/year=2014/month=1/day=1/80732d9dc80689f_1418645991_data.0. | 5.77MB | year=2014/month=1/day=1 | +| hdfs://<varname>impala_data_dir</varname>/show_files.db/part_text/year=2014/month=1/day=1/80732d9dc8068a0_1418645991_data.0. | 6.25MB | year=2014/month=1/day=1 | +| hdfs://<varname>impala_data_dir</varname>/show_files.db/part_text/year=2014/month=1/day=1/80732d9dc8068a1_147082319_data.0. | 7.16MB | year=2014/month=1/day=1 | +| hdfs://<varname>impala_data_dir</varname>/show_files.db/part_text/year=2014/month=1/day=1/80732d9dc8068a2_2111411753_data.0. | 5.98MB | year=2014/month=1/day=1 | +| hdfs://<varname>impala_data_dir</varname>/show_files.db/part_text/year=2014/month=1/day=2/21a828cf494b5bbb_501271652_data.0. | 6.42MB | year=2014/month=1/day=2 | +| hdfs://<varname>impala_data_dir</varname>/show_files.db/part_text/year=2014/month=1/day=2/21a828cf494b5bbc_501271652_data.0. | 6.62MB | year=2014/month=1/day=2 | +| hdfs://<varname>impala_data_dir</varname>/show_files.db/part_text/year=2014/month=1/day=2/21a828cf494b5bbd_1393490200_data.0. | 6.98MB | year=2014/month=1/day=2 | +| hdfs://<varname>impala_data_dir</varname>/show_files.db/part_text/year=2014/month=1/day=2/21a828cf494b5bbe_1393490200_data.0. | 6.20MB | year=2014/month=1/day=2 | ++---------------------------------------------------------------------------------------------------------+--------+-------------------------+ </codeblock> <p> The following example shows a <codeph>SHOW FILES</codeph> statement for a partitioned Parquet table. @@ -240,9 +246,9 @@ where id between 1000001 and 2000000; are parallelized differently than for text tables. (Also, the amount of data is so small that it can be written to Parquet without involving all the hosts in this 4-node cluster.) </p> -<codeblock>[localhost:21000] > create table partitioned_parquet (x bigint, y int, s string) partitioned by (year bigint, month bigint, day bigint) stored as parquet; -[localhost:21000] > insert into partitioned_parquet partition (year,month,day) select x, y, s, year, month, day from partitioned_text; -[localhost:21000] > show partitions partitioned_parquet; +<codeblock scale="60">[localhost:21000] > create table part_parq (x bigint, y int, s string) partitioned by (year bigint, month bigint, day bigint) stored as parquet; +[localhost:21000] > insert into part_parq partition (year,month,day) select x, y, s, year, month, day from partitioned_text; +[localhost:21000] > show partitions part_parq; +-------+-------+-----+-------+--------+---------+--------------+-------------------+---------+-------------------+ | year | month | day | #Rows | #Files | Size | Bytes Cached | Cache Replication | Format | Incremental stats | +-------+-------+-----+-------+--------+---------+--------------+-------------------+---------+-------------------+ @@ -250,28 +256,28 @@ where id between 1000001 and 2000000; | 2014 | 1 | 2 | -1 | 3 | 17.89MB | NOT CACHED | NOT CACHED | PARQUET | false | | Total | | | -1 | 6 | 35.79MB | 0B | | | | +-------+-------+-----+-------+--------+---------+--------------+-------------------+---------+-------------------+ -[localhost:21000] > show files in partitioned_parquet; -+---------------------------------------------------------------------------------------------------------+--------+-------------------------+ -| path | size | partition | -+---------------------------------------------------------------------------------------------------------+--------+-------------------------+ -| hdfs://<varname>impala_data_dir</varname>/show_files.db/partitioned_parquet/year=2014/month=1/day=1/1134113650_data.0.parq | 4.49MB | year=2014/month=1/day=1 | -| hdfs://<varname>impala_data_dir</varname>/show_files.db/partitioned_parquet/year=2014/month=1/day=1/617567880_data.0.parq | 5.14MB | year=2014/month=1/day=1 | -| hdfs://<varname>impala_data_dir</varname>/show_files.db/partitioned_parquet/year=2014/month=1/day=1/2099499416_data.0.parq | 8.27MB | year=2014/month=1/day=1 | -| hdfs://<varname>impala_data_dir</varname>/show_files.db/partitioned_parquet/year=2014/month=1/day=2/945567189_data.0.parq | 8.80MB | year=2014/month=1/day=2 | -| hdfs://<varname>impala_data_dir</varname>/show_files.db/partitioned_parquet/year=2014/month=1/day=2/2145850112_data.0.parq | 4.80MB | year=2014/month=1/day=2 | -| hdfs://<varname>impala_data_dir</varname>/show_files.db/partitioned_parquet/year=2014/month=1/day=2/665613448_data.0.parq | 4.29MB | year=2014/month=1/day=2 | -+---------------------------------------------------------------------------------------------------------+--------+-------------------------+ +[localhost:21000] > show files in part_parq; ++-----------------------------------------------------------------------------------------------+--------+-------------------------+ +| path | size | partition | ++-----------------------------------------------------------------------------------------------+--------+-------------------------+ +| hdfs://<varname>impala_data_dir</varname>/show_files.db/part_parq/year=2014/month=1/day=1/1134113650_data.0.parq | 4.49MB | year=2014/month=1/day=1 | +| hdfs://<varname>impala_data_dir</varname>/show_files.db/part_parq/year=2014/month=1/day=1/617567880_data.0.parq | 5.14MB | year=2014/month=1/day=1 | +| hdfs://<varname>impala_data_dir</varname>/show_files.db/part_parq/year=2014/month=1/day=1/2099499416_data.0.parq | 8.27MB | year=2014/month=1/day=1 | +| hdfs://<varname>impala_data_dir</varname>/show_files.db/part_parq/year=2014/month=1/day=2/945567189_data.0.parq | 8.80MB | year=2014/month=1/day=2 | +| hdfs://<varname>impala_data_dir</varname>/show_files.db/part_parq/year=2014/month=1/day=2/2145850112_data.0.parq | 4.80MB | year=2014/month=1/day=2 | +| hdfs://<varname>impala_data_dir</varname>/show_files.db/part_parq/year=2014/month=1/day=2/665613448_data.0.parq | 4.29MB | year=2014/month=1/day=2 | ++-----------------------------------------------------------------------------------------------+--------+-------------------------+ </codeblock> <p> The following example shows output from the <codeph>SHOW FILES</codeph> statement for a table where the data files are stored in Amazon S3: </p> -<codeblock>[localhost:21000] > show files in s3_testing.sample_data_s3; -+-----------------------------------------------------------------------+---------+-----------+ -| path | size | partition | -+-----------------------------------------------------------------------+---------+-----------+ -| s3a://impala-demo/sample_data/e065453cba1988a6_1733868553_data.0.parq | 24.84MB | | -+-----------------------------------------------------------------------+---------+-----------+ +<codeblock scale="70">[localhost:21000] > show files in s3_testing.sample_data_s3; ++-----------------------------------------------------------------------+---------+ +| path | size | ++-----------------------------------------------------------------------+---------+ +| s3a://impala-demo/sample_data/e065453cba1988a6_1733868553_data.0.parq | 24.84MB | ++-----------------------------------------------------------------------+---------+ </codeblock> <!-- <p conref="../shared/impala_common.xml#common/related_info"/> @@ -487,6 +493,11 @@ where id between 1000001 and 2000000; <codeph>INSERT</codeph> statements. </p> + <p rev="2.5.0 IMPALA-2070"> + In CDH 5.7 / Impala 2.5 and higher, the output includes a second column showing any associated comment + for each database. + </p> + <p> The output of <codeph>SHOW DATABASES</codeph> includes the special <codeph>_impala_builtins</codeph> database, which lets you view definitions of built-in functions, as described under <codeph>SHOW @@ -506,27 +517,15 @@ where id between 1000001 and 2000000; into a database, or <codeph>SHOW TABLES</codeph> once you are inside a particular database. </p> -<codeblock>[localhost:21000] > show databases; -+--------------------+ -| name | -+--------------------+ -| _impala_builtins | -| analyze_testing | -| avro | -| ctas | -| d1 | -| d2 | -| d3 | -| default | -| file_formats | -| hbase | -| load_data | -| partitioning | -| regexp_testing | -| reports | -| temporary | -+--------------------+ -Returned 14 row(s) in 0.02s +<codeblock rev="2.5.0 IMPALA-2070">[localhost:21000] > show databases; ++------------------+----------------------------------------------+ +| name | comment | ++------------------+----------------------------------------------+ +| _impala_builtins | System database for Impala builtin functions | +| default | Default Hive database | +| file_formats | | ++------------------+----------------------------------------------+ +Returned 3 row(s) in 0.02s [localhost:21000] > show tables in file_formats; +--------------------+ | name | @@ -1176,6 +1175,21 @@ show column stats store_sales; several UDFs with the same name, each accepting different argument data types. </p> + <p conref="../shared/impala_common.xml#common/usage_notes_blurb"/> + + <p rev="2.5.0"> + In CDH 5.7 / Impala 2.5 and higher, the <codeph>SHOW FUNCTIONS</codeph> output includes + a new column, labelled <codeph>is persistent</codeph>. This property is <codeph>true</codeph> for + Impala built-in functions, C++ UDFs, and Java UDFs created using the new <codeph>CREATE FUNCTION</codeph> + syntax with no signature. It is <codeph>false</codeph> for Java UDFs created using the old + <codeph>CREATE FUNCTION</codeph> syntax that includes the types for the arguments and return value. + Any functions with <codeph>false</codeph> shown for this property must be created again by the + <codeph>CREATE FUNCTION</codeph> statement each time the Impala catalog server is restarted. + See <codeph>CREATE FUNCTION</codeph> for information on switching to the new syntax, so that + Java UDFs are preserved across restarts. Java UDFs that are persisted this way are also easier + to share across Impala and Hive. + </p> + <p conref="../shared/impala_common.xml#common/security_blurb"/> <p conref="../shared/impala_common.xml#common/show_security"/> @@ -1188,43 +1202,30 @@ show column stats store_sales; To display Impala built-in functions, specify the special database name <codeph>_impala_builtins</codeph>: </p> -<codeblock>show functions in _impala_builtins; +<codeblock rev="2.5.0">show functions in _impala_builtins; ++--------------+-------------------------------------------------+-------------+---------------+ +| return type | signature | binary type | is persistent | ++--------------+-------------------------------------------------+-------------+---------------+ +| BIGINT | abs(BIGINT) | BUILTIN | true | +| DECIMAL(*,*) | abs(DECIMAL(*,*)) | BUILTIN | true | +| DOUBLE | abs(DOUBLE) | BUILTIN | true | +| FLOAT | abs(FLOAT) | BUILTIN | true | +----------------+----------------------------------------+ -| return type | signature | -+----------------+----------------------------------------+ -| BOOLEAN | ifnull(BOOLEAN, BOOLEAN) | -| TINYINT | ifnull(TINYINT, TINYINT) | -| SMALLINT | ifnull(SMALLINT, SMALLINT) | -| INT | ifnull(INT, INT) | ... show functions in _impala_builtins like '*week*'; -+-------------+------------------------------+ -| return type | signature | -+-------------+------------------------------+ -| INT | weekofyear(TIMESTAMP) | -| TIMESTAMP | weeks_add(TIMESTAMP, INT) | -| TIMESTAMP | weeks_add(TIMESTAMP, BIGINT) | -| TIMESTAMP | weeks_sub(TIMESTAMP, INT) | -| TIMESTAMP | weeks_sub(TIMESTAMP, BIGINT) | -| INT | dayofweek(TIMESTAMP) | -+-------------+------------------------------+ ++-------------+------------------------------+-------------+---------------+ +| return type | signature | binary type | is persistent | ++-------------+------------------------------+-------------+---------------+ +| INT | dayofweek(TIMESTAMP) | BUILTIN | true | +| INT | weekofyear(TIMESTAMP) | BUILTIN | true | +| TIMESTAMP | weeks_add(TIMESTAMP, BIGINT) | BUILTIN | true | +| TIMESTAMP | weeks_add(TIMESTAMP, INT) | BUILTIN | true | +| TIMESTAMP | weeks_sub(TIMESTAMP, BIGINT) | BUILTIN | true | +| TIMESTAMP | weeks_sub(TIMESTAMP, INT) | BUILTIN | true | ++-------------+------------------------------+-------------+---------------+ </codeblock> - <p> - To search for functions that use a particular data type, specify a case-sensitive data type name in all - capitals: - </p> - -<codeblock>show functions in _impala_builtins like '*BIGINT*'; -+----------------------------------------+ -| name | -+----------------------------------------+ -| adddate(TIMESTAMP, BIGINT) | -| bin(BIGINT) | -| coalesce(BIGINT...) | -...</codeblock> - <p conref="../shared/impala_common.xml#common/related_info"/> <p>
http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/3c2c8f12/docs/topics/impala_smallint.xml ---------------------------------------------------------------------- diff --git a/docs/topics/impala_smallint.xml b/docs/topics/impala_smallint.xml index 3aae9ad..1ed95e6 100644 --- a/docs/topics/impala_smallint.xml +++ b/docs/topics/impala_smallint.xml @@ -3,7 +3,7 @@ <concept id="smallint"> <title>SMALLINT Data Type</title> - <titlealts><navtitle>SMALLINT</navtitle></titlealts> + <titlealts audience="PDF"><navtitle>SMALLINT</navtitle></titlealts> <prolog> <metadata> <data name="Category" value="Impala"/> @@ -79,7 +79,7 @@ SELECT CAST(1000 AS SMALLINT); <p conref="../shared/impala_common.xml#common/text_bulky"/> -<!-- <p conref="/Content/impala_common_xi44078.xml#common/compatibility_blurb"/> --> +<!-- <p conref="../shared/impala_common.xml#common/compatibility_blurb"/> --> <p conref="../shared/impala_common.xml#common/internals_2_bytes"/> @@ -87,7 +87,7 @@ SELECT CAST(1000 AS SMALLINT); <p conref="../shared/impala_common.xml#common/column_stats_constant"/> -<!-- <p conref="/Content/impala_common_xi44078.xml#common/restrictions_blurb"/> --> +<!-- <p conref="../shared/impala_common.xml#common/restrictions_blurb"/> --> <p conref="../shared/impala_common.xml#common/related_info"/> http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/3c2c8f12/docs/topics/impala_stddev.xml ---------------------------------------------------------------------- diff --git a/docs/topics/impala_stddev.xml b/docs/topics/impala_stddev.xml index 0cdff45..9cd4126 100644 --- a/docs/topics/impala_stddev.xml +++ b/docs/topics/impala_stddev.xml @@ -3,7 +3,7 @@ <concept rev="1.4" id="stddev"> <title>STDDEV, STDDEV_SAMP, STDDEV_POP Functions</title> - <titlealts><navtitle>STDDEV, STDDEV_SAMP, STDDEV_POP</navtitle></titlealts> + <titlealts audience="PDF"><navtitle>STDDEV, STDDEV_SAMP, STDDEV_POP</navtitle></titlealts> <prolog> <metadata> <data name="Category" value="Impala"/> @@ -11,6 +11,8 @@ <data name="Category" value="Impala Functions"/> <data name="Category" value="Aggregate Functions"/> <data name="Category" value="Querying"/> + <data name="Category" value="Developers"/> + <data name="Category" value="Data Analysts"/> </metadata> </prolog> http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/3c2c8f12/docs/topics/impala_string.xml ---------------------------------------------------------------------- diff --git a/docs/topics/impala_string.xml b/docs/topics/impala_string.xml index 9ad77c3..1be9d98 100644 --- a/docs/topics/impala_string.xml +++ b/docs/topics/impala_string.xml @@ -3,7 +3,7 @@ <concept id="string"> <title>STRING Data Type</title> - <titlealts><navtitle>STRING</navtitle></titlealts> + <titlealts audience="PDF"><navtitle>STRING</navtitle></titlealts> <prolog> <metadata> <data name="Category" value="Impala"/> @@ -41,9 +41,27 @@ <p> <b>Character sets:</b> For full support in all Impala subsystems, restrict string values to the ASCII - character set. UTF-8 character data can be stored in Impala and retrieved through queries, but UTF-8 strings - containing non-ASCII characters are not guaranteed to work properly with string manipulation functions, - comparison operators, or the <codeph>ORDER BY</codeph> clause. For any national language aspects such as + character set. Although some UTF-8 character data can be stored in Impala and retrieved through queries, UTF-8 strings + containing non-ASCII characters are not guaranteed to work properly in combination with many SQL aspects, + including but not limited to: + </p> + <ul> + <li> + String manipulation functions. + </li> + <li> + Comparison operators. + </li> + <li> + The <codeph>ORDER BY</codeph> clause. + </li> + <li> + Values in partition key columns. + </li> + </ul> + + <p> + For any national language aspects such as collation order or interpreting extended ASCII variants such as ISO-8859-1 or ISO-8859-2 encodings, Impala does not include such metadata with the table definition. If you need to sort, manipulate, or display data depending on those national language characteristics of string data, use logic on the application side. @@ -101,21 +119,22 @@ <p conref="../shared/impala_common.xml#common/zero_length_strings"/> -<!-- <p conref="/Content/impala_common_xi44078.xml#common/hbase_blurb"/> --> +<!-- <p conref="../shared/impala_common.xml#common/hbase_blurb"/> --> -<!-- <p conref="/Content/impala_common_xi44078.xml#common/parquet_blurb"/> --> +<!-- <p conref="../shared/impala_common.xml#common/parquet_blurb"/> --> <p conref="../shared/impala_common.xml#common/text_bulky"/> -<!-- <p conref="/Content/impala_common_xi44078.xml#common/compatibility_blurb"/> --> + <p><b>Avro considerations:</b></p> + <p conref="../shared/impala_common.xml#common/avro_2gb_strings"/> -<!-- <p conref="/Content/impala_common_xi44078.xml#common/internals_blurb"/> --> +<!-- <p conref="../shared/impala_common.xml#common/compatibility_blurb"/> --> -<!-- <p conref="/Content/impala_common_xi44078.xml#common/added_in_20"/> --> +<!-- <p conref="../shared/impala_common.xml#common/internals_blurb"/> --> - <p conref="../shared/impala_common.xml#common/column_stats_variable"/> +<!-- <p conref="../shared/impala_common.xml#common/added_in_20"/> --> -<!-- <p conref="/Content/impala_common_xi44078.xml#common/restrictions_blurb"/> --> + <p conref="../shared/impala_common.xml#common/column_stats_variable"/> <p conref="../shared/impala_common.xml#common/example_blurb"/> http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/3c2c8f12/docs/topics/impala_string_functions.xml ---------------------------------------------------------------------- diff --git a/docs/topics/impala_string_functions.xml b/docs/topics/impala_string_functions.xml index a051ed5..8f8636f 100644 --- a/docs/topics/impala_string_functions.xml +++ b/docs/topics/impala_string_functions.xml @@ -2,7 +2,7 @@ <concept id="string_functions"> <title>Impala String Functions</title> - <titlealts><navtitle>String Functions</navtitle></titlealts> + <titlealts audience="PDF"><navtitle>String Functions</navtitle></titlealts> <prolog> <metadata> <data name="Category" value="Impala"/> @@ -154,7 +154,7 @@ select concat('[',btrim('xyhelxyzlozyzzxx','xyz'),']'); <indexterm audience="Cloudera">chr() function</indexterm> <b>Purpose:</b> Returns a character specified by a decimal code point value. The interpretation and display of the resulting character depends on your system locale. - Because consistent processing of Impala string values is only guaranteed + Because consistent processing of Impala string values is only guaranteed for values within the ASCII range, only use this function for values corresponding to ASCII characters. In particular, parameter values greater than 255 return an empty string. @@ -256,6 +256,11 @@ SELECT chr(97); in the result set, or to produce multiple concatenated strings for subsets of rows, include a <codeph>GROUP BY</codeph> clause in the query. </p> + <p> + Strictly speaking, <codeph>group_concat()</codeph> is an aggregate function, not a scalar + function like the others in this list. + For additional details and examples, see <xref href="impala_group_concat.xml#group_concat"/>. + </p> </dd> </dlentry> @@ -338,6 +343,8 @@ SELECT chr(97); <p> <b>Return type:</b> <codeph>string</codeph> </p> + <p conref="../shared/impala_common.xml#common/usage_notes_blurb"/> + <p conref="../shared/impala_common.xml#common/case_insensitive_comparisons_tip"/> </dd> </dlentry> @@ -469,6 +476,112 @@ Returned 1 row(s) in 0.11s</codeblock> </dlentry> + <dlentry rev="2.3.0" id="regexp_like"> + + <dt> + <codeph>regexp_like(string source, string pattern[, string options])</codeph> + </dt> + + <dd> + <indexterm audience="Cloudera">regexp_like() function</indexterm> + <b>Purpose:</b> Returns <codeph>true</codeph> or <codeph>false</codeph> to indicate + whether the source string contains anywhere inside it the regular expression given by the pattern. + The optional third argument consists of letter flags that change how the match is performed, + such as <codeph>i</codeph> for case-insensitive matching. + <p conref="../shared/impala_common.xml#common/syntax_blurb"/> + <p> + The flags that you can include in the optional third argument are: + </p> + <ul> + <li> + <codeph>c</codeph>: Case-sensitive matching (the default). + </li> + <li> + <codeph>i</codeph>: Case-insensitive matching. If multiple instances of <codeph>c</codeph> and <codeph>i</codeph> + are included in the third argument, the last such option takes precedence. + </li> + <li> + <codeph>m</codeph>: Multi-line matching. The <codeph>^</codeph> and <codeph>$</codeph> + operators match the start or end of any line within the source string, not the + start and end of the entire string. + </li> + <li> + <codeph>n</codeph>: Newline matching. The <codeph>.</codeph> operator can match the + newline character. A repetition operator such as <codeph>.*</codeph> can + match a portion of the source string that spans multiple lines. + </li> + </ul> + <p> + <b>Return type:</b> <codeph>boolean</codeph> + </p> + <p conref="../shared/impala_common.xml#common/regexp_re2"/> + <p conref="../shared/impala_common.xml#common/regexp_re2_warning"/> + <p conref="../shared/impala_common.xml#common/regexp_escapes"/> + <p conref="../shared/impala_common.xml#common/example_blurb"/> + <p> + This example shows how <codeph>regexp_like()</codeph> can test for the existence + of various kinds of regular expression patterns within a source string: + </p> +<codeblock><![CDATA[ +-- Matches because the 'f' appears somewhere in 'foo'. +select regexp_like('foo','f'); ++-------------------------+ +| regexp_like('foo', 'f') | ++-------------------------+ +| true | ++-------------------------+ + +-- Does not match because the comparison is case-sensitive by default. +select regexp_like('foo','F'); ++-------------------------+ +| regexp_like('foo', 'f') | ++-------------------------+ +| false | ++-------------------------+ + +-- The 3rd argument can change the matching logic, such as 'i' meaning case-insensitive. +select regexp_like('foo','F','i'); ++------------------------------+ +| regexp_like('foo', 'f', 'i') | ++------------------------------+ +| true | ++------------------------------+ + +-- The familiar regular expression notations work, such as ^ and $ anchors... +select regexp_like('foo','f$'); ++--------------------------+ +| regexp_like('foo', 'f$') | ++--------------------------+ +| false | ++--------------------------+ + +select regexp_like('foo','o$'); ++--------------------------+ +| regexp_like('foo', 'o$') | ++--------------------------+ +| true | ++--------------------------+ + +-- ...and repetition operators such as * and + +select regexp_like('foooooobar','fo+b'); ++-----------------------------------+ +| regexp_like('foooooobar', 'fo+b') | ++-----------------------------------+ +| true | ++-----------------------------------+ + +select regexp_like('foooooobar','fx*y*o*b'); ++---------------------------------------+ +| regexp_like('foooooobar', 'fx*y*o*b') | ++---------------------------------------+ +| true | ++---------------------------------------+ +]]> +</codeblock> + </dd> + + </dlentry> + <dlentry id="regexp_replace"> <dt> @@ -609,6 +722,98 @@ Returned 1 row(s) in 0.12s</codeblock> </dlentry> + <dlentry rev="2.3.0 CDH-35915 IMPALA-2084" id="split_part"> + + <dt> + <codeph>split_part(string source, string delimiter, bigint n)</codeph> + </dt> + + <dd> + <indexterm audience="Cloudera">split_part() function</indexterm> + <b>Purpose:</b> Returns the nth field within a delimited string. + The fields are numbered starting from 1. + The delimiter can consist of multiple characters, not just a + single character. All matching of the delimiter is done exactly, not using any + regular expression patterns. + <p> + <b>Return type:</b> <codeph>string</codeph> + </p> + <p conref="../shared/impala_common.xml#common/regexp_re2"/> + <p conref="../shared/impala_common.xml#common/regexp_re2_warning"/> + <p conref="../shared/impala_common.xml#common/regexp_escapes"/> + <p conref="../shared/impala_common.xml#common/example_blurb"/> + <p> + These examples show how to retrieve the nth field from a delimited string: + </p> +<codeblock><![CDATA[ +select split_part('x,y,z',',',1); ++-----------------------------+ +| split_part('x,y,z', ',', 1) | ++-----------------------------+ +| x | ++-----------------------------+ + +select split_part('x,y,z',',',2); ++-----------------------------+ +| split_part('x,y,z', ',', 2) | ++-----------------------------+ +| y | ++-----------------------------+ + +select split_part('x,y,z',',',3); ++-----------------------------+ +| split_part('x,y,z', ',', 3) | ++-----------------------------+ +| z | ++-----------------------------+ +</codeblock> + + <p> + These examples show what happens for out-of-range field positions. + Specifying a value less than 1 produces an error. Specifying a value + greater than the number of fields returns a zero-length string + (which is not the same as <codeph>NULL</codeph>). + </p> +<codeblock><![CDATA[ +select split_part('x,y,z',',',0); +ERROR: Invalid field position: 0 + +with t1 as (select split_part('x,y,z',',',4) nonexistent_field) + select + nonexistent_field + , concat('[',nonexistent_field,']') + , length(nonexistent_field); +from t1 ++-------------------+-------------------------------------+---------------------------+ +| nonexistent_field | concat('[', nonexistent_field, ']') | length(nonexistent_field) | ++-------------------+-------------------------------------+---------------------------+ +| | [] | 0 | ++-------------------+-------------------------------------+---------------------------+ +</codeblock> + + <p> + These examples show how the delimiter can be a multi-character value: + </p> +<codeblock><![CDATA[ +select split_part('one***two***three','***',2); ++-------------------------------------------+ +| split_part('one***two***three', '***', 2) | ++-------------------------------------------+ +| two | ++-------------------------------------------+ + +select split_part('one\|/two\|/three','\|/',3); ++-------------------------------------------+ +| split_part('one\|/two\|/three', '\|/', 3) | ++-------------------------------------------+ +| three | ++-------------------------------------------+ +]]> +</codeblock> + </dd> + + </dlentry> + <dlentry id="strleft"> <dt> @@ -711,6 +916,8 @@ Returned 1 row(s) in 0.12s</codeblock> <p> <b>Return type:</b> <codeph>string</codeph> </p> + <p conref="../shared/impala_common.xml#common/usage_notes_blurb"/> + <p conref="../shared/impala_common.xml#common/case_insensitive_comparisons_tip"/> </dd> </dlentry> http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/3c2c8f12/docs/topics/impala_struct.xml ---------------------------------------------------------------------- diff --git a/docs/topics/impala_struct.xml b/docs/topics/impala_struct.xml index 1e440fc..769ff49 100644 --- a/docs/topics/impala_struct.xml +++ b/docs/topics/impala_struct.xml @@ -1,110 +1,108 @@ -<?xml version="1.0" encoding="UTF-8"?><!DOCTYPE concept PUBLIC "-//OASIS//DTD DITA Concept//EN" "concept.dtd"> +<?xml version="1.0" encoding="UTF-8"?> +<!DOCTYPE concept PUBLIC "-//OASIS//DTD DITA Concept//EN" "concept.dtd"> +<concept id="struct"> - <concept id="struct"> + <title>STRUCT Complex Type (CDH 5.5 or higher only)</title> - <title>STRUCT Complex Type (CDH 5.5 or higher only)</title> + <prolog> + <metadata> + <data name="Category" value="Impala"/> + <data name="Category" value="Impala Data Types"/> + <data name="Category" value="SQL"/> + <data name="Category" value="Developers"/> + <data name="Category" value="Data Analysts"/> + </metadata> + </prolog> - <prolog> - <metadata> - <data name="Category" value="Impala"/> - <data name="Category" value="Impala Data Types"/> - </metadata> - </prolog> + <conbody> - <conbody> + <p> + A complex data type, representing multiple fields of a single item. Frequently used as the element type of an <codeph>ARRAY</codeph> + or the <codeph>VALUE</codeph> part of a <codeph>MAP</codeph>. + </p> - <p> - A complex data type, representing multiple fields of a single item. - Frequently used as the element type of an <codeph>ARRAY</codeph> - or the <codeph>VALUE</codeph> part of a <codeph>MAP</codeph>. - </p> - - <p conref="../shared/impala_common.xml#common/syntax_blurb"/> + <p conref="../shared/impala_common.xml#common/syntax_blurb"/> <codeblock><varname>column_name</varname> STRUCT < <varname>name</varname> : <varname>type</varname> [COMMENT '<varname>comment_string</varname>'], ... > type ::= <varname>primitive_type</varname> | <varname>complex_type</varname> </codeblock> - <p> - The names and number of fields within the <codeph>STRUCT</codeph> are fixed. Each field can be a different type. - A field within a <codeph>STRUCT</codeph> can also be another <codeph>STRUCT</codeph>, or an <codeph>ARRAY</codeph> - or a <codeph>MAP</codeph>, allowing you to create nested data structures with a maximum nesting depth of 100. - </p> - - <p> - A <codeph>STRUCT</codeph> can be the top-level type for a column, or can itself be an item within an <codeph>ARRAY</codeph> - or the value part of the key-value pair in a <codeph>MAP</codeph>. - </p> - - <p> - When a <codeph>STRUCT</codeph> is used as an <codeph>ARRAY</codeph> element or a <codeph>MAP</codeph> value, - you use a join clause to bring the <codeph>ARRAY</codeph> or <codeph>MAP</codeph> elements into the result set, and then refer - to <codeph><varname>array_name</varname>.ITEM.<varname>field</varname></codeph> or - <codeph><varname>map_name</varname>.VALUE.<varname>field</varname></codeph>. - In the case of a <codeph>STRUCT</codeph> directly inside an <codeph>ARRAY</codeph> or <codeph>MAP</codeph>, - you can omit the <codeph>.ITEM</codeph> and <codeph>.VALUE</codeph> pseudocolumns and refer directly to - <codeph><varname>array_name</varname>.<varname>field</varname></codeph> or - <codeph><varname>map_name</varname>.<varname>field</varname></codeph>. - </p> - - <p conref="../shared/impala_common.xml#common/usage_notes_blurb"/> - - <p conref="../shared/impala_common.xml#common/complex_types_combo"/> - - <p> - A <codeph>STRUCT</codeph> is similar conceptually to a table row: it contains a fixed number of named fields, - each with a predefined type. To combine two related tables, while using complex types to - minimize repetition, the typical way to represent that data is as an <codeph>ARRAY</codeph> of <codeph>STRUCT</codeph> elements. - </p> - - <p> - Because a <codeph>STRUCT</codeph> has a fixed number of named fields, it typically does not make sense - to have a <codeph>STRUCT</codeph> as the type of a table column. In such a case, you could just make each field of the - <codeph>STRUCT</codeph> into a separate column of the table. The <codeph>STRUCT</codeph> type is most - useful as an item of an <codeph>ARRAY</codeph> or the value part of the key-value pair in a <codeph>MAP</codeph>. - A nested type column with a <codeph>STRUCT</codeph> at the lowest level lets you associate a variable - number of row-like objects with each row of the table. - </p> - - <p> - The <codeph>STRUCT</codeph> type is straightforward to reference within a query. You do not need to - include the <codeph>STRUCT</codeph> column in a join clause or give it a table alias, as is - required for the <codeph>ARRAY</codeph> and <codeph>MAP</codeph> types. You refer to the individual - fields using dot notation, such as <codeph><varname>struct_column_name</varname>.<varname>field_name</varname></codeph>, - without any pseudocolumn such as <codeph>ITEM</codeph> or <codeph>VALUE</codeph>. - </p> - - <p conref="../shared/impala_common.xml#common/complex_types_describe"/> - - <p conref="../shared/impala_common.xml#common/internals_blurb"/> - - <p> - Within the Parquet data file, the values for each <codeph>STRUCT</codeph> field are stored adjacent to each other, - so that they can be encoded and compressed using all the Parquet techniques for storing sets of similar or - repeated values. The adjacency applies even when the <codeph>STRUCT</codeph> values are part of an - <codeph>ARRAY</codeph> or <codeph>MAP</codeph>. During a query, Impala avoids unnecessary I/O by reading only the portions - of the Parquet data file containing the requested <codeph>STRUCT</codeph> fields. - </p> - - <p conref="../shared/impala_common.xml#common/added_in_230"/> - - <p conref="../shared/impala_common.xml#common/restrictions_blurb"/> - - <ul conref="../shared/impala_common.xml#common/complex_types_restrictions"> - <li/> - </ul> - - <p conref="../shared/impala_common.xml#common/example_blurb"/> - - <note conref="../shared/impala_common.xml#common/complex_type_schema_pointer"/> - - <p> - The following example shows a table with various kinds of <codeph>STRUCT</codeph> columns, - both at the top level and nested within other complex types. - Practice the <codeph>CREATE TABLE</codeph> and query notation for complex type columns - using empty tables, until you can visualize a complex data structure and construct corresponding SQL statements reliably. - </p> + <p> + The names and number of fields within the <codeph>STRUCT</codeph> are fixed. Each field can be a different type. A field within a + <codeph>STRUCT</codeph> can also be another <codeph>STRUCT</codeph>, or an <codeph>ARRAY</codeph> or a <codeph>MAP</codeph>, allowing + you to create nested data structures with a maximum nesting depth of 100. + </p> + + <p> + A <codeph>STRUCT</codeph> can be the top-level type for a column, or can itself be an item within an <codeph>ARRAY</codeph> or the + value part of the key-value pair in a <codeph>MAP</codeph>. + </p> + + <p> + When a <codeph>STRUCT</codeph> is used as an <codeph>ARRAY</codeph> element or a <codeph>MAP</codeph> value, you use a join clause to + bring the <codeph>ARRAY</codeph> or <codeph>MAP</codeph> elements into the result set, and then refer to + <codeph><varname>array_name</varname>.ITEM.<varname>field</varname></codeph> or + <codeph><varname>map_name</varname>.VALUE.<varname>field</varname></codeph>. In the case of a <codeph>STRUCT</codeph> directly inside + an <codeph>ARRAY</codeph> or <codeph>MAP</codeph>, you can omit the <codeph>.ITEM</codeph> and <codeph>.VALUE</codeph> pseudocolumns + and refer directly to <codeph><varname>array_name</varname>.<varname>field</varname></codeph> or + <codeph><varname>map_name</varname>.<varname>field</varname></codeph>. + </p> + + <p conref="../shared/impala_common.xml#common/usage_notes_blurb"/> + + <p conref="../shared/impala_common.xml#common/complex_types_combo"/> + + <p> + A <codeph>STRUCT</codeph> is similar conceptually to a table row: it contains a fixed number of named fields, each with a predefined + type. To combine two related tables, while using complex types to minimize repetition, the typical way to represent that data is as an + <codeph>ARRAY</codeph> of <codeph>STRUCT</codeph> elements. + </p> + + <p> + Because a <codeph>STRUCT</codeph> has a fixed number of named fields, it typically does not make sense to have a + <codeph>STRUCT</codeph> as the type of a table column. In such a case, you could just make each field of the <codeph>STRUCT</codeph> + into a separate column of the table. The <codeph>STRUCT</codeph> type is most useful as an item of an <codeph>ARRAY</codeph> or the + value part of the key-value pair in a <codeph>MAP</codeph>. A nested type column with a <codeph>STRUCT</codeph> at the lowest level + lets you associate a variable number of row-like objects with each row of the table. + </p> + + <p> + The <codeph>STRUCT</codeph> type is straightforward to reference within a query. You do not need to include the + <codeph>STRUCT</codeph> column in a join clause or give it a table alias, as is required for the <codeph>ARRAY</codeph> and + <codeph>MAP</codeph> types. You refer to the individual fields using dot notation, such as + <codeph><varname>struct_column_name</varname>.<varname>field_name</varname></codeph>, without any pseudocolumn such as + <codeph>ITEM</codeph> or <codeph>VALUE</codeph>. + </p> + + <p conref="../shared/impala_common.xml#common/complex_types_describe"/> + + <p conref="../shared/impala_common.xml#common/internals_blurb"/> + + <p> + Within the Parquet data file, the values for each <codeph>STRUCT</codeph> field are stored adjacent to each other, so that they can be + encoded and compressed using all the Parquet techniques for storing sets of similar or repeated values. The adjacency applies even + when the <codeph>STRUCT</codeph> values are part of an <codeph>ARRAY</codeph> or <codeph>MAP</codeph>. During a query, Impala avoids + unnecessary I/O by reading only the portions of the Parquet data file containing the requested <codeph>STRUCT</codeph> fields. + </p> + + <p conref="../shared/impala_common.xml#common/added_in_230"/> + + <p conref="../shared/impala_common.xml#common/restrictions_blurb"/> + + <ul conref="../shared/impala_common.xml#common/complex_types_restrictions"> + <li/> + </ul> + + <p conref="../shared/impala_common.xml#common/example_blurb"/> + + <note conref="../shared/impala_common.xml#common/complex_type_schema_pointer"/> + + <p> + The following example shows a table with various kinds of <codeph>STRUCT</codeph> columns, both at the top level and nested within + other complex types. Practice the <codeph>CREATE TABLE</codeph> and query notation for complex type columns using empty tables, until + you can visualize a complex data structure and construct corresponding SQL statements reliably. + </p> <codeblock><![CDATA[CREATE TABLE struct_demo ( @@ -128,15 +126,13 @@ STORED AS PARQUET; ]]> </codeblock> - <p> - The following example shows how to examine the structure of a table containing one or more - <codeph>STRUCT</codeph> columns by using the <codeph>DESCRIBE</codeph> statement. You can - visualize each <codeph>STRUCT</codeph> as its own table, with columns - named the same as each field of the <codeph>STRUCT</codeph>. - If the <codeph>STRUCT</codeph> is nested inside another complex type, such as <codeph>ARRAY</codeph>, - you can extend the qualified name passed to <codeph>DESCRIBE</codeph> until the output - shows just the <codeph>STRUCT</codeph> fields. - </p> + <p> + The following example shows how to examine the structure of a table containing one or more <codeph>STRUCT</codeph> columns by using + the <codeph>DESCRIBE</codeph> statement. You can visualize each <codeph>STRUCT</codeph> as its own table, with columns named the same + as each field of the <codeph>STRUCT</codeph>. If the <codeph>STRUCT</codeph> is nested inside another complex type, such as + <codeph>ARRAY</codeph>, you can extend the qualified name passed to <codeph>DESCRIBE</codeph> until the output shows just the + <codeph>STRUCT</codeph> fields. + </p> <codeblock><![CDATA[DESCRIBE struct_demo; +-------------------+--------------------------+ @@ -169,8 +165,16 @@ STORED AS PARQUET; | | postal_code:string | | | > | +-------------------+--------------------------+ +]]> +</codeblock> + + <p> + The top-level column <codeph>EMPLOYEE_INFO</codeph> is a <codeph>STRUCT</codeph>. Describing + <codeph><varname>table_name</varname>.<varname>struct_name</varname></codeph> displays the fields of the <codeph>STRUCT</codeph> as if + they were columns of a table: + </p> -DESCRIBE struct_demo.employee_info; +<codeblock><![CDATA[DESCRIBE struct_demo.employee_info; +----------+--------+ | name | type | +----------+--------+ @@ -178,10 +182,15 @@ DESCRIBE struct_demo.employee_info; | id | bigint | | address | string | +----------+--------+ +]]> +</codeblock> + + <p> + Because <codeph>PLACES_LIVED</codeph> is a <codeph>STRUCT</codeph> inside an <codeph>ARRAY</codeph>, the initial + <codeph>DESCRIBE</codeph> shows the structure of the <codeph>ARRAY</codeph>: + </p> --- Because PLACES_LIVED is a STRUCT inside an ARRAY, the --- initial DESCRIBE shows the structure of the ARRAY. -DESCRIBE struct_demo.places_lived; +<codeblock><![CDATA[DESCRIBE struct_demo.places_lived; +------+------------------+ | name | type | +------+------------------+ @@ -192,10 +201,15 @@ DESCRIBE struct_demo.places_lived; | | > | | pos | bigint | +------+------------------+ +]]> +</codeblock> --- Ask for the details of the ITEM field of the ARRAY to see --- just the layout of the STRUCT. -DESCRIBE struct_demo.places_lived.item; + <p> + Ask for the details of the <codeph>ITEM</codeph> field of the <codeph>ARRAY</codeph> to see just the layout of the + <codeph>STRUCT</codeph>: + </p> + +<codeblock><![CDATA[DESCRIBE struct_demo.places_lived.item; +---------+--------+ | name | type | +---------+--------+ @@ -203,11 +217,15 @@ DESCRIBE struct_demo.places_lived.item; | city | string | | country | string | +---------+--------+ +]]> +</codeblock> + + <p> + Likewise, <codeph>MEMORABLE_MOMENTS</codeph> has a <codeph>STRUCT</codeph> inside a <codeph>MAP</codeph>, which requires an extra + level of qualified name to see just the <codeph>STRUCT</codeph> part: + </p> --- Likewise, MEMORABLE_MOMENTS has a STRUCT inside a MAP, --- which requires an extra level of qualified name to see --- just the STRUCT part. -DESCRIBE struct_demo.memorable_moments; +<codeblock><![CDATA[DESCRIBE struct_demo.memorable_moments; +-------+------------------+ | name | type | +-------+------------------+ @@ -218,10 +236,15 @@ DESCRIBE struct_demo.memorable_moments; | | details:string | | | > | +-------+------------------+ +]]> +</codeblock> + + <p> + For a <codeph>MAP</codeph>, ask to see the <codeph>VALUE</codeph> field to see the corresponding <codeph>STRUCT</codeph> fields in a + table-like structure: + </p> --- For a MAP, ask to see the VALUE field to see the --- corresponding STRUCT fields in a table-like structure. -DESCRIBE struct_demo.memorable_moments.value; +<codeblock><![CDATA[DESCRIBE struct_demo.memorable_moments.value; +---------+--------+ | name | type | +---------+--------+ @@ -229,10 +252,14 @@ DESCRIBE struct_demo.memorable_moments.value; | place | string | | details | string | +---------+--------+ +]]> +</codeblock> --- For a STRUCT inside a STRUCT, we can see the fields of the --- outer STRUCT... -DESCRIBE struct_demo.current_address; + <p> + For a <codeph>STRUCT</codeph> inside a <codeph>STRUCT</codeph>, we can see the fields of the outer <codeph>STRUCT</codeph>: + </p> + +<codeblock><![CDATA[DESCRIBE struct_demo.current_address; +----------------+-----------------------+ | name | type | +----------------+-----------------------+ @@ -244,10 +271,14 @@ DESCRIBE struct_demo.current_address; | country | string | | postal_code | string | +----------------+-----------------------+ +]]> +</codeblock> --- ...and then use a further qualified name to see just the --- fields of the inner STRUCT. -DESCRIBE struct_demo.current_address.street_address; + <p> + Then we can use a further qualified name to see just the fields of the inner <codeph>STRUCT</codeph>: + </p> + +<codeblock><![CDATA[DESCRIBE struct_demo.current_address.street_address; +---------------+--------+ | name | type | +---------------+--------+ @@ -258,17 +289,16 @@ DESCRIBE struct_demo.current_address.street_address; ]]> </codeblock> - <p> - The following example shows how to examine the structure of a table containing one or more - <codeph>STRUCT</codeph> columns by using the <codeph>DESCRIBE</codeph> statement. You can - visualize each <codeph>STRUCT</codeph> as its own table, with columns - named the same as each field of the <codeph>STRUCT</codeph>. - If the <codeph>STRUCT</codeph> is nested inside another complex type, such as <codeph>ARRAY</codeph>, - you can extend the qualified name passed to <codeph>DESCRIBE</codeph> until the output - shows just the <codeph>STRUCT</codeph> fields. - </p> + <p> + The following example shows how to examine the structure of a table containing one or more <codeph>STRUCT</codeph> columns by using + the <codeph>DESCRIBE</codeph> statement. You can visualize each <codeph>STRUCT</codeph> as its own table, with columns named the same + as each field of the <codeph>STRUCT</codeph>. If the <codeph>STRUCT</codeph> is nested inside another complex type, such as + <codeph>ARRAY</codeph>, you can extend the qualified name passed to <codeph>DESCRIBE</codeph> until the output shows just the + <codeph>STRUCT</codeph> fields. + </p> <!-- To do: See why the most verbose query form gives an error. --> + <codeblock><![CDATA[DESCRIBE struct_demo; +-------------------+--------------------------+---------+ | name | type | comment | @@ -311,9 +341,8 @@ SELECT id, employee_info.id AS employee_id, employee_info.employer SELECT id, name, street, city, country FROM struct_demo, struct_demo.places_lived; -SELECT id, name, struct_demo.places_lived.pos, struct_demo.places_lived.street, struct_demo.places_lived.city, struct_demo.places_lived.country +SELECT id, name, places_lived.pos, places_lived.street, places_lived.city, places_lived.country FROM struct_demo, struct_demo.places_lived; -ERROR: AnalysisException: Illegal column/field reference 'struct_demo.places_lived.pos' with intermediate collection 'places_lived' of type 'ARRAY<STRUCT<street:STRING,city:STRING,country:STRING>>' SELECT id, name, pl.pos, pl.street, pl.city, pl.country FROM struct_demo, struct_demo.places_lived AS pl; @@ -324,13 +353,12 @@ SELECT id, name, places_lived.pos, places_lived.street, places_lived.city, place SELECT id, name, pos, street, city, country FROM struct_demo, struct_demo.places_lived; -SELECT id, name, struct_demo.memorable_moments.key, - struct_demo.memorable_moments.value.year, - struct_demo.memorable_moments.value.place, - struct_demo.memorable_moments.value.details +SELECT id, name, memorable_moments.key, + memorable_moments.value.year, + memorable_moments.value.place, + memorable_moments.value.details FROM struct_demo, struct_demo.memorable_moments -WHERE struct_demo.memorable_moments.key IN ('Birthday','Anniversary','Graduation'); -ERROR: AnalysisException: Illegal column/field reference 'struct_demo.memorable_moments.key' with intermediate collection 'memorable_moments' of type 'MAP<STRING,STRUCT<year:INT,place:STRING,details:STRING>>' +WHERE memorable_moments.key IN ('Birthday','Anniversary','Graduation'); SELECT id, name, mm.key, mm.value.year, mm.value.place, mm.value.details FROM struct_demo, struct_demo.memorable_moments AS mm @@ -359,48 +387,47 @@ FROM struct_demo; ]]> </codeblock> - <p> - For example, this table uses a struct that encodes several data values for each phone number associated - with a person. Each person can have a variable-length array of associated phone numbers, and queries can - refer to the category field to locate specific home, work, mobile, and so on kinds of phone numbers. - </p> + <p> + For example, this table uses a struct that encodes several data values for each phone number associated with a person. Each person can + have a variable-length array of associated phone numbers, and queries can refer to the category field to locate specific home, work, + mobile, and so on kinds of phone numbers. + </p> -<codeblock>CREATE TABLE contact_info_many_structs +<codeblock><![CDATA[CREATE TABLE contact_info_many_structs ( id BIGINT, name STRING, - phone_numbers ARRAY < STRUCT <category:STRING, country_code:STRING, area_code:SMALLINT, full_number:STRING, mobile:BOOLEAN, carrier:STRING > > + phone_numbers ARRAY < STRUCT <category:STRING, country_code:STRING, area_code:SMALLINT, full_number:STRING, mobile:BOOLEAN, carrier:STRING > > ) STORED AS PARQUET; +]]> </codeblock> - <p> - Because structs are naturally suited to composite values where the fields have different data types, you might use them - to decompose things such as addresses: - </p> + <p> + Because structs are naturally suited to composite values where the fields have different data types, you might use them to decompose + things such as addresses: + </p> -<codeblock>CREATE TABLE contact_info_detailed_address +<codeblock><![CDATA[CREATE TABLE contact_info_detailed_address ( id BIGINT, name STRING, - address STRUCT < house_number:INT, street:STRING, street_type:STRING, apartment:STRING, city:STRING, region:STRING, country:STRING > + address STRUCT < house_number:INT, street:STRING, street_type:STRING, apartment:STRING, city:STRING, region:STRING, country:STRING > ); +]]> </codeblock> - <p> - In a big data context, splitting out data fields such as the number part of the address and the street name - could let you do analysis on each field independently. For example, which streets have the largest number - range of addresses, what are the statistical properties of the street names, which areas have a higher - proportion of <q>Roads</q>, <q>Courts</q> or <q>Boulevards</q>, and so on. - </p> - - <p conref="../shared/impala_common.xml#common/related_info"/> + <p> + In a big data context, splitting out data fields such as the number part of the address and the street name could let you do analysis + on each field independently. For example, which streets have the largest number range of addresses, what are the statistical + properties of the street names, which areas have a higher proportion of <q>Roads</q>, <q>Courts</q> or <q>Boulevards</q>, and so on. + </p> - <p> - <xref href="impala_complex_types.xml#complex_types"/>, - <xref href="impala_array.xml#array"/>, - <!-- <xref href="impala_struct.xml#struct"/>, --> - <xref href="impala_map.xml#map"/> - </p> + <p conref="../shared/impala_common.xml#common/related_info"/> - </conbody> + <p> + <xref href="impala_complex_types.xml#complex_types"/>, <xref href="impala_array.xml#array"/>, +<!-- <xref href="impala_struct.xml#struct"/>, --> + <xref href="impala_map.xml#map"/> + </p> - </concept> + </conbody> +</concept> http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/3c2c8f12/docs/topics/impala_subqueries.xml ---------------------------------------------------------------------- diff --git a/docs/topics/impala_subqueries.xml b/docs/topics/impala_subqueries.xml index ed99f3a..d1c4d79 100644 --- a/docs/topics/impala_subqueries.xml +++ b/docs/topics/impala_subqueries.xml @@ -3,18 +3,20 @@ <concept rev="2.0.0" id="subqueries"> <title>Subqueries in Impala SELECT Statements</title> - <titlealts><navtitle>Subqueries</navtitle></titlealts> + <titlealts audience="PDF"><navtitle>Subqueries</navtitle></titlealts> <prolog> <metadata> <data name="Category" value="Impala"/> <data name="Category" value="SQL"/> <data name="Category" value="Querying"/> + <data name="Category" value="Developers"/> + <data name="Category" value="Data Analysts"/> </metadata> </prolog> <conbody> - <p> + <p rev="2.0.0"> <indexterm audience="Cloudera">subqueries</indexterm> A <term>subquery</term> is a query that is nested within another query. Subqueries let queries on one table dynamically adapt based on the contents of another table. This technique provides great flexibility and @@ -153,10 +155,10 @@ SELECT x FROM t1 WHERE x IN (SELECT y FROM t2 WHERE state = 'CA'); SELECT x FROM t1 WHERE y > (SELECT count(z) FROM t2); </codeblock> -<!-- <p conref="/Content/impala_common_xi44078.xml#common/partitioning_blurb"/> --> +<!-- <p conref="../shared/impala_common.xml#common/partitioning_blurb"/> --> <!-- -<p conref="/Content/impala_common_xi44078.xml#common/hbase_blurb"/> +<p conref="../shared/impala_common.xml#common/hbase_blurb"/> <p> Currently, the <codeph>IN (<varname>subquery</varname>)</codeph> operator results in a full table scan of an HBase table, rather than being translated into a series of single-row lookups. @@ -165,9 +167,9 @@ Therefore, this is not an efficient construct to use with Impala queries for HBa --> <!-- -<p conref="/Content/impala_common_xi44078.xml#common/parquet_blurb"/> -<p conref="/Content/impala_common_xi44078.xml#common/text_blurb"/> -<p conref="/Content/impala_common_xi44078.xml#common/compatibility_blurb"/> +<p conref="../shared/impala_common.xml#common/parquet_blurb"/> +<p conref="../shared/impala_common.xml#common/text_blurb"/> +<p conref="../shared/impala_common.xml#common/compatibility_blurb"/> --> <p conref="../shared/impala_common.xml#common/usage_notes_blurb"/> http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/3c2c8f12/docs/topics/impala_sum.xml ---------------------------------------------------------------------- diff --git a/docs/topics/impala_sum.xml b/docs/topics/impala_sum.xml index 6d25f1c..cb9c453 100644 --- a/docs/topics/impala_sum.xml +++ b/docs/topics/impala_sum.xml @@ -2,7 +2,7 @@ <concept id="sum"> <title>SUM Function</title> - <titlealts><navtitle>SUM</navtitle></titlealts> + <titlealts audience="PDF"><navtitle>SUM</navtitle></titlealts> <prolog> <metadata> <data name="Category" value="Impala"/> @@ -11,6 +11,8 @@ <data name="Category" value="Analytic Functions"/> <data name="Category" value="Aggregate Functions"/> <data name="Category" value="Querying"/> + <data name="Category" value="Developers"/> + <data name="Category" value="Data Analysts"/> </metadata> </prolog> @@ -41,7 +43,7 @@ <p conref="../shared/impala_common.xml#common/complex_types_blurb"/> <p conref="../shared/impala_common.xml#common/complex_types_aggregation_explanation"/> - + <p conref="../shared/impala_common.xml#common/complex_types_aggregation_example"/> <p conref="../shared/impala_common.xml#common/example_blurb"/> @@ -162,7 +164,7 @@ select x, property, Changing the direction of the <codeph>ORDER BY</codeph> clause causes the intermediate results of the cumulative total to be calculated in a different order: -<codeblock>select sum(x) over (partition by property <b>order by x desc</b>) as 'cumulative total' +<codeblock>select sum(x) over (partition by property <b>order by x desc</b>) as 'cumulative total' from int_t where property in ('odd','even'); +----+----------+------------------+ | x | property | cumulative total | @@ -221,16 +223,17 @@ ERROR: AnalysisException: RANGE is only supported with both the lower and upper </codeblock> </p> + <p conref="../shared/impala_common.xml#common/restrictions_blurb"/> + +<!-- This conref appears under SUM(), AVG(), FLOAT, and DOUBLE topics. --> + + <p conref="../shared/impala_common.xml#common/sum_double"/> + <p conref="../shared/impala_common.xml#common/related_info"/> <p> <xref href="impala_analytic_functions.xml#analytic_functions"/> </p> - <p conref="../shared/impala_common.xml#common/restrictions_blurb"/> - -<!-- This conref appears under SUM(), AVG(), FLOAT, and DOUBLE topics. --> - - <p conref="../shared/impala_common.xml#common/sum_double"/> </conbody> </concept> http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/3c2c8f12/docs/topics/impala_support_start_over.xml ---------------------------------------------------------------------- diff --git a/docs/topics/impala_support_start_over.xml b/docs/topics/impala_support_start_over.xml index 2c17b5d..906efaa 100644 --- a/docs/topics/impala_support_start_over.xml +++ b/docs/topics/impala_support_start_over.xml @@ -3,6 +3,7 @@ <concept id="support_start_over"> <title>SUPPORT_START_OVER Query Option</title> + <titlealts audience="PDF"><navtitle>SUPPORT_START_OVER</navtitle></titlealts> <prolog> <metadata> <data name="Category" value="Impala"/> http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/3c2c8f12/docs/topics/impala_sync_ddl.xml ---------------------------------------------------------------------- diff --git a/docs/topics/impala_sync_ddl.xml b/docs/topics/impala_sync_ddl.xml index b217f67..7453e94 100644 --- a/docs/topics/impala_sync_ddl.xml +++ b/docs/topics/impala_sync_ddl.xml @@ -3,12 +3,15 @@ <concept rev="1.2.1" id="sync_ddl"> <title>SYNC_DDL Query Option</title> + <titlealts audience="PDF"><navtitle>SYNC_DDL</navtitle></titlealts> <prolog> <metadata> <data name="Category" value="Impala"/> <data name="Category" value="Impala Query Options"/> <data name="Category" value="DDL"/> <data name="Category" value="SQL"/> + <data name="Category" value="Developers"/> + <data name="Category" value="Data Analysts"/> </metadata> </prolog> @@ -43,9 +46,9 @@ <p conref="../shared/impala_common.xml#common/type_boolean"/> <p conref="../shared/impala_common.xml#common/default_false_0"/> - <draft-comment translate="no"> -Example could be useful here. -</draft-comment> + <!-- To do: + Example could be useful here. + --> <p conref="../shared/impala_common.xml#common/related_info"/> <p> http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/3c2c8f12/docs/topics/impala_tables.xml ---------------------------------------------------------------------- diff --git a/docs/topics/impala_tables.xml b/docs/topics/impala_tables.xml index 30e3737..dfd3199 100644 --- a/docs/topics/impala_tables.xml +++ b/docs/topics/impala_tables.xml @@ -3,7 +3,7 @@ <concept id="tables"> <title>Overview of Impala Tables</title> - <titlealts><navtitle>Tables</navtitle></titlealts> + <titlealts audience="PDF"><navtitle>Tables</navtitle></titlealts> <prolog> <metadata> <data name="Category" value="Impala"/> http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/3c2c8f12/docs/topics/impala_timeouts.xml ---------------------------------------------------------------------- diff --git a/docs/topics/impala_timeouts.xml b/docs/topics/impala_timeouts.xml index fe537a6..03ad773 100644 --- a/docs/topics/impala_timeouts.xml +++ b/docs/topics/impala_timeouts.xml @@ -3,19 +3,170 @@ <concept rev="1.2.1" id="timeouts"> <title>Setting Timeout Periods for Daemons, Queries, and Sessions</title> - <titlealts audience="PDF"><navtitle>Setting Timeouts</navtitle></titlealts> - + + <titlealts audience="PDF"> + + <navtitle>Setting Timeouts</navtitle> + + </titlealts> + + <prolog> + <metadata> + <data name="Category" value="Impala"/> + <data name="Category" value="Administrators"/> + <data name="Category" value="Scheduling"/> + <data name="Category" value="Scalability"/> + </metadata> + </prolog> <conbody> <p> - Depending on how busy your CDH cluster is, you might increase or decrease various timeout values. - Increase timeouts if Impala is cancelling operations prematurely, when the system is - responding slower than usual but the operations are still successful if given extra time. - Decrease timeouts if operations are idle or hanging for long periods, and the - idle or hung operations are consuming resources and reducing concurrency. + Depending on how busy your CDH cluster is, you might increase or decrease various timeout + values. Increase timeouts if Impala is cancelling operations prematurely, when the system + is responding slower than usual but the operations are still successful if given extra + time. Decrease timeouts if operations are idle or hanging for long periods, and the idle + or hung operations are consuming resources and reducing concurrency. </p> - + <p outputclass="toc inpage"/> + </conbody> + + <concept id="statestore_timeout"> + + <title>Increasing the Statestore Timeout</title> + + <conbody> + + <p rev="IMP-1210"> + If you have an extensive Impala schema, for example with hundreds of databases, tens of + thousands of tables, and so on, you might encounter timeout errors during startup as the + Impala catalog service broadcasts metadata to all the Impala nodes using the statestore + service. To avoid such timeout errors on startup, increase the statestore timeout value + from its default of 10 seconds. Specify the timeout value using the + <codeph>-statestore_subscriber_timeout_seconds</codeph> option for the statestore + service, using the configuration instructions in + <xref href="impala_config_options.xml#config_options"/>. The symptom of this problem is + messages in the <codeph>impalad</codeph> log such as: + </p> + +<codeblock>Connection with state-store lost +Trying to re-register with state-store</codeblock> + + <p> + See <xref href="impala_scalability.xml#statestore_scalability"/> for more details about + statestore operation and settings on clusters with a large number of Impala-related + objects such as tables and partitions. + </p> + + </conbody> + + </concept> + + <concept id="impalad_timeout"> + + <title>Setting the Idle Query and Idle Session Timeouts for impalad</title> + + <conbody> + + <p> + To keep long-running queries or idle sessions from tying up cluster resources, you can + set timeout intervals for both individual queries, and entire sessions. + </p> + + <note conref="../shared/impala_common.xml#common/timeout_clock_blurb"/> + + <p> + Specify the following startup options for the <cmdname>impalad</cmdname> daemon: + </p> + + <ul> + <li> + The <codeph>--idle_query_timeout</codeph> option specifies the time in seconds after + which an idle query is cancelled. This could be a query whose results were all fetched + but was never closed, or one whose results were partially fetched and then the client + program stopped requesting further results. This condition is most likely to occur in + a client program using the JDBC or ODBC interfaces, rather than in the interactive + <cmdname>impala-shell</cmdname> interpreter. Once the query is cancelled, the client + program cannot retrieve any further results. + </li> + + <li> + The <codeph>--idle_session_timeout</codeph> option specifies the time in seconds after + which an idle session is expired. A session is idle when no activity is occurring for + any of the queries in that session, and the session has not started any new queries. + Once a session is expired, you cannot issue any new query requests to it. The session + remains open, but the only operation you can perform is to close it. The default value + of 0 means that sessions never expire. + </li> + </ul> + + <p> + For instructions on changing <cmdname>impalad</cmdname> startup options, see + <xref href="impala_config_options.xml#config_options"/>. + </p> + + <p rev="2.0.0"> + You can reduce the idle query timeout by using the <codeph>QUERY_TIMEOUT_S</codeph> + query option. Any value specified for the <codeph>--idle_query_timeout</codeph> startup + option serves as an upper limit for the <codeph>QUERY_TIMEOUT_S</codeph> query option. + See <xref href="impala_query_timeout_s.xml#query_timeout_s"/> for details. + </p> + + </conbody> + + </concept> + <concept id="concept_rfy_jl1_rx"> + <title>Setting Timeout and Retries for Thrift Connections to the Backend + Client</title> + <conbody> + <p>Impala connections to the backend client are subject to failure in + cases when the network is momentarily overloaded. To avoid failed + queries due to transient network problems, you can configure the number + of Thrift connection retries using the following option: </p> + <ul id="ul_bj3_ql1_rx"> + <li>The <codeph>--backend_client_connection_num_retries</codeph> option + specifies the number of times Impala will try connecting to the + backend client after the first connection attempt fails. By default, + <cmdname>impalad</cmdname> will attempt three re-connections before + it returns a failure. </li> + </ul> + <p>You can configure timeouts for sending and receiving data from the + backend client. Therefore, if for some reason a query hangs, instead of + waiting indefinitely for a response, Impala will terminate the + connection after a configurable timeout.</p> + <ul id="ul_vm2_2v1_rx"> + <li>The <codeph>--backend_client_rpc_timeout_ms</codeph> option can be + used to specify the number of milliseconds Impala should wait for a + response from the backend client before it terminates the connection + and signals a failure. The default value for this property is 300000 + milliseconds, or 5 minutes. </li> + </ul> + </conbody> + </concept> + + <concept id="cancel_query"> + + <title>Cancelling a Query</title> + + <conbody> + + <p> + Sometimes, an Impala query might run for an unexpectedly long time, tying up resources + in the cluster. You can cancel the query explicitly, independent of the timeout period, + by going into the web UI for the <cmdname>impalad</cmdname> host (on port 25000 by + default), and using the link on the <codeph>/queries</codeph> tab to cancel the running + query. Various client applications let you interactively cancel queries submitted or + monitored through those applications. For example, by pressing <codeph>^C</codeph> in + <cmdname>impala-shell</cmdname>, clicking the <uicontrol>Cancel</uicontrol> button from + the <uicontrol>Watch</uicontrol> page in Hue, clicking <uicontrol>Actions > + Cancel</uicontrol> from the <uicontrol>Queries</uicontrol> list in Cloudera Manager, and + so on. + </p> + + </conbody> + + </concept> + </concept> http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/3c2c8f12/docs/topics/impala_timestamp.xml ---------------------------------------------------------------------- diff --git a/docs/topics/impala_timestamp.xml b/docs/topics/impala_timestamp.xml index c469b54..4b6e16e 100644 --- a/docs/topics/impala_timestamp.xml +++ b/docs/topics/impala_timestamp.xml @@ -3,7 +3,7 @@ <concept id="timestamp"> <title>TIMESTAMP Data Type</title> - <titlealts><navtitle>TIMESTAMP</navtitle></titlealts> + <titlealts audience="PDF"><navtitle>TIMESTAMP</navtitle></titlealts> <prolog> <metadata> <data name="Category" value="Impala"/> @@ -396,7 +396,7 @@ insert into dates_and_times values <p conref="../shared/impala_common.xml#common/text_bulky"/> -<!-- <p conref="/Content/impala_common_xi44078.xml#common/compatibility_blurb"/> --> +<!-- <p conref="../shared/impala_common.xml#common/compatibility_blurb"/> --> <p conref="../shared/impala_common.xml#common/internals_16_bytes"/> @@ -404,6 +404,10 @@ insert into dates_and_times values <p conref="../shared/impala_common.xml#common/column_stats_constant"/> + <p conref="../shared/impala_common.xml#common/sqoop_blurb"/> + + <p conref="../shared/impala_common.xml#common/sqoop_timestamp_caveat"/> + <p conref="../shared/impala_common.xml#common/restrictions_blurb"/> <p>
