[05/22] incubator-impala git commit: First try at porting over the source files necessary for the Impala SQL Reference.

jrussell Tue, 26 Jul 2016 16:06:13 -0700

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/463ddf92/docs/topics/impala_show.xml
----------------------------------------------------------------------
diff --git a/docs/topics/impala_show.xml b/docs/topics/impala_show.xml
new file mode 100644
index 0000000..1e8c17d
--- /dev/null
+++ b/docs/topics/impala_show.xml
@@ -0,0 +1,1263 @@
+<?xml version="1.0" encoding="UTF-8"?><!DOCTYPE concept PUBLIC "-//OASIS//DTD 
DITA Concept//EN" "concept.dtd">
+<concept id="show">
+
+  <title>SHOW Statement</title>
+  <titlealts><navtitle>SHOW</navtitle></titlealts>
+  <prolog>
+    <metadata>
+      <data name="Category" value="Impala"/>
+      <data name="Category" value="SQL"/>
+      <data name="Category" value="Reports"/>
+    </metadata>
+  </prolog>
+
+  <conbody>
+
+    <p>
+      <indexterm audience="Cloudera">SHOW statement</indexterm>
+      The <codeph>SHOW</codeph> statement is a flexible way to get information 
about different types of Impala
+      objects.
+    </p>
+
+    <p conref="../shared/impala_common.xml#common/syntax_blurb"/>
+
+<codeblock>SHOW DATABASES [[LIKE] '<varname>pattern</varname>']
+SHOW SCHEMAS [[LIKE] '<varname>pattern</varname>'] - an alias for SHOW 
DATABASES
+SHOW TABLES [IN <varname>database_name</varname>] [[LIKE] 
'<varname>pattern</varname>']
+<ph rev="1.2.0">SHOW [AGGREGATE | ANALYTIC] FUNCTIONS [IN 
<varname>database_name</varname>] [[LIKE] '<varname>pattern</varname>']</ph>
+<ph rev="1.2.1">SHOW CREATE TABLE 
[<varname>database_name</varname>].<varname>table_name</varname></ph>
+<ph rev="1.2.1">SHOW TABLE STATS 
[<varname>database_name</varname>.]<varname>table_name</varname></ph>
+<ph rev="1.2.1">SHOW COLUMN STATS 
[<varname>database_name</varname>.]<varname>table_name</varname></ph>
+<ph rev="1.4.0">SHOW PARTITIONS 
[<varname>database_name</varname>.]<varname>table_name</varname></ph>
+<ph rev="2.2.0">SHOW FILES IN 
[<varname>database_name</varname>.]<varname>table_name</varname> [PARTITION 
(<varname>key_col</varname>=<varname>value</varname> [, 
<varname>key_col</varname>=<varname>value</varname>]]</ph>
+
+<ph rev="2.0.0">SHOW ROLES
+SHOW CURRENT ROLES
+SHOW ROLE GRANT GROUP <varname>group_name</varname>
+SHOW GRANT ROLE <varname>role_name</varname></ph>
+</codeblock>
+
+<!-- SHOW ROLE GRANT { USER <varname>user_name</varname> | GROUP 
<varname>group_name</varname> | ROLE <varname>role_name</varname> } -->
+
+<!-- Extracted from the previous codeblock because even hidden content 
produces blank lines.
+<ph audience="Cloudera" rev="1.4.0">SHOW DATA SOURCES [LIKE 
'<varname>source_name</varname>]</ph>
+-->
+
+<!-- Some suggestion there would be this syntax for 1.4, but it's not in the 
builds:
+<ph rev="1.4.0">SHOW [CACHED] TABLES [IN <varname>database_name</varname>] 
[[LIKE] '<varname>pattern</varname>']</ph>
+<ph rev="1.4.0">SHOW [CACHED] PARTITIONS 
[<varname>database_name</varname>.]<varname>table_name</varname></ph>
+-->
+
+    <p>
+      Issue a <codeph>SHOW <varname>object_type</varname></codeph> statement 
to see the appropriate objects in the
+      current database, or <codeph>SHOW <varname>object_type</varname> IN 
<varname>database_name</varname></codeph>
+      to see objects in a specific database.
+    </p>
+
+    <p>
+      The optional <varname>pattern</varname> argument is a quoted string 
literal, using Unix-style
+      <codeph>*</codeph> wildcards and allowing <codeph>|</codeph> for 
alternation. The preceding
+      <codeph>LIKE</codeph> keyword is also optional. All object names are 
stored in lowercase, so use all
+      lowercase letters in the pattern string. For example:
+    </p>
+
+<codeblock>show databases 'a*';
+show databases like 'a*';
+show tables in some_db like '*fact*';
+use some_db;
+show tables '*dim*|*fact*';</codeblock>
+
+    <p conref="../shared/impala_common.xml#common/cancel_blurb_no"/>
+
+    <p outputclass="toc inpage"/>
+
+  </conbody>
+
+  <concept rev="2.2.0" id="show_files">
+
+    <title>SHOW FILES Statement</title>
+  <prolog>
+    <metadata>
+      <data name="Category" value="Disk Storage"/>
+      <data name="Category" value="Tables"/>
+    </metadata>
+  </prolog>
+
+    <conbody>
+
+      <p>
+        The <codeph>SHOW FILES</codeph> statement displays the files that 
constitute a specified table,
+        or a partition within a partitioned table. This syntax is available in 
CDH 5.4 and higher
+        only. The output includes the names of the files, the size of each 
file, and the applicable partition
+        for a partitioned table. The size includes a suffix of 
<codeph>B</codeph> for bytes,
+        <codeph>MB</codeph> for megabytes, and <codeph>GB</codeph> for 
gigabytes.
+      </p>
+
+      <note>
+        This statement applies to tables and partitions stored on HDFS, or in 
the Amazon Simple Storage System (S3).
+        It does not apply to views.
+        It does not apply to tables mapped onto HBase, because HBase does not 
use the same file-based storage layout.
+      </note>
+
+      <p conref="../shared/impala_common.xml#common/usage_notes_blurb"/>
+
+      <p>
+        You can use this statement to verify the results of your ETL process: 
that is, that
+        the expected files are present, with the expected sizes. You can 
examine the file information
+        to detect conditions such as empty files, missing files, or 
inefficient layouts due to
+        a large number of small files. When you use <codeph>INSERT</codeph> 
statements to copy
+        from one table to another, you can see how the file layout changes due 
to file format
+        conversions, compaction of small input files into large data blocks, 
and
+        multiple output files from parallel queries and partitioned inserts.
+      </p>
+
+      <p>
+        The output from this statement does not include files that Impala 
considers to be hidden
+        or invisible, such as those whose names start with a dot or an 
underscore, or that
+        end with the suffixes <codeph>.copying</codeph> or 
<codeph>.tmp</codeph>.
+      </p>
+
+      <p>
+        The information for partitioned tables complements the output of the 
<codeph>SHOW PARTITIONS</codeph>
+        statement, which summarizes information about each partition. 
<codeph>SHOW PARTITIONS</codeph>
+        produces some output for each partition, while <codeph>SHOW 
FILES</codeph> does not
+        produce any output for empty partitions because they do not include 
any data files.
+      </p>
+
+<!-- Extensive round of testing makes me pretty confident of these findings. 
-->
+      <p conref="../shared/impala_common.xml#common/permissions_blurb"/>
+      <p rev="CDH-19187">
+        The user ID that the <cmdname>impalad</cmdname> daemon runs under,
+        typically the <codeph>impala</codeph> user, must have read 
+        permission for all the table files, read and execute permission for 
all the directories that make up the table,
+        and execute permission for the database directory and all its parent 
directories.
+      </p>
+
+      <p conref="../shared/impala_common.xml#common/example_blurb"/>
+
+      <p>
+        The following example constructs <codeph>SHOW FILES</codeph> statements
+        for an unpartitioned tables using text format:
+      </p>
+
+<codeblock>[localhost:21000] > create table unpartitioned_text (x bigint, s 
string);
+[localhost:21000] > insert into unpartitioned_text (x, s) select id, name from 
oreilly.sample_data limit 20e6;
+[localhost:21000] > show files in unpartitioned_text;
++-------------------------------------------------------------------------------------+----------+-----------+
+| path                                                                         
       | size     | partition |
++-------------------------------------------------------------------------------------+----------+-----------+
+| 
hdfs://<varname>impala_data_dir</varname>/show_files.db/unpartitioned_text/35665776ef85cfaf_1012432410_data.0.
 | 448.31MB |           |
++-------------------------------------------------------------------------------------+----------+-----------+
+[localhost:21000] > insert into unpartitioned_text (x, s) select id, name from 
oreilly.sample_data limit 100e6;
+[localhost:21000] > show files in unpartitioned_text;
++---------------------------------------------------------------------------------------------+----------+-----------+
+| path                                                                         
               | size     | partition |
++---------------------------------------------------------------------------------------------+----------+-----------+
+| 
hdfs://<varname>impala_data_dir</varname>/show_files.db/unpartitioned_text/35665776ef85cfaf_1012432410_data.0.
 | 448.31MB |           |
+| 
hdfs://<varname>impala_data_dir</varname>/show_files.db/unpartitioned_text/ac3dba252a8952b8_1663177415_data.0.
 | 2.19GB   |           |
++---------------------------------------------------------------------------------------------+----------+-----------+
+</codeblock>
+
+      <p>
+        This example illustrates how, after issuing some <codeph>INSERT ... 
VALUES</codeph> statements,
+        the table now contains some tiny files of just a few bytes. Such small 
files could cause inefficient processing of
+        parallel queries that are expecting multi-megabyte input files. The 
example shows how you might compact the small files by doing
+        an <codeph>INSERT ... SELECT</codeph> into a different table, possibly 
converting the data to Parquet in the process:
+      </p>
+
+<codeblock>[localhost:21000] > insert into unpartitioned_text values 
(10,'hello'), (20, 'world');
+[localhost:21000] > insert into unpartitioned_text values (-1,'foo'), (-1000, 
'bar');
+[localhost:21000] > show files in unpartitioned_text;
++---------------------------------------------------------------------------------------------+----------+-----------+
+| path                                                                         
               | size     | partition |
++---------------------------------------------------------------------------------------------+----------+-----------+
+| 
hdfs://<varname>impala_data_dir</varname>/show_files.db/unpartitioned_text/4f11b8bdf8b6aa92_238145083_data.0.
  | 18B      |           |
+| 
hdfs://<varname>impala_data_dir</varname>/show_files.db/unpartitioned_text/35665776ef85cfaf_1012432410_data.0.
 | 448.31MB |           |
+| 
hdfs://<varname>impala_data_dir</varname>/show_files.db/unpartitioned_text/ac3dba252a8952b8_1663177415_data.0.
 | 2.19GB   |           |
+| 
hdfs://<varname>impala_data_dir</varname>/show_files.db/unpartitioned_text/cfb8252452445682_1868457216_data.0.
 | 17B      |           |
++---------------------------------------------------------------------------------------------+----------+-----------+
+[localhost:21000] > create table unpartitioned_parquet stored as parquet as 
select * from unpartitioned_text;
++---------------------------+
+| summary                   |
++---------------------------+
+| Inserted 120000002 row(s) |
++---------------------------+
+[localhost:21000] > show files in unpartitioned_parquet;
++----------------------------------------------------------------------------------------------------+----------+-----------+
+| path                                                                         
                      | size     | partition |
++----------------------------------------------------------------------------------------------------+----------+-----------+
+| 
hdfs://<varname>impala_data_dir</varname>/show_files.db/unpartitioned_parquet/60798d96ba630184_549959007_data.0.parq
  | 255.36MB |           |
+| 
hdfs://<varname>impala_data_dir</varname>/show_files.db/unpartitioned_parquet/60798d96ba630184_549959007_data.1.parq
  | 178.52MB |           |
+| 
hdfs://<varname>impala_data_dir</varname>/show_files.db/unpartitioned_parquet/60798d96ba630185_549959007_data.0.parq
  | 255.37MB |           |
+| 
hdfs://<varname>impala_data_dir</varname>/show_files.db/unpartitioned_parquet/60798d96ba630185_549959007_data.1.parq
  | 57.71MB  |           |
+| 
hdfs://<varname>impala_data_dir</varname>/show_files.db/unpartitioned_parquet/60798d96ba630186_2141167244_data.0.parq
 | 255.40MB |           |
+| 
hdfs://<varname>impala_data_dir</varname>/show_files.db/unpartitioned_parquet/60798d96ba630186_2141167244_data.1.parq
 | 175.52MB |           |
+| 
hdfs://<varname>impala_data_dir</varname>/show_files.db/unpartitioned_parquet/60798d96ba630187_1006832086_data.0.parq
 | 255.40MB |           |
+| 
hdfs://<varname>impala_data_dir</varname>/show_files.db/unpartitioned_parquet/60798d96ba630187_1006832086_data.1.parq
 | 214.61MB |           |
++----------------------------------------------------------------------------------------------------+----------+-----------+
+</codeblock>
+
+      <p>
+        The following example shows a <codeph>SHOW FILES</codeph> statement 
for a partitioned text table
+        with data in two different partitions, and two empty partitions.
+        The partitions with no data are not represented in the <codeph>SHOW 
FILES</codeph> output.
+      </p>
+<codeblock>[localhost:21000] > create table partitioned_text (x bigint, y int, 
s string) partitioned by (year bigint, month bigint, day bigint);
+[localhost:21000] > insert overwrite partitioned_text (x, y, s) partition 
(year=2014,month=1,day=1) select id, val, name from oreilly.normalized_parquet
+where id between 1 and 1000000;
+[localhost:21000] > insert overwrite partitioned_text (x, y, s) partition 
(year=2014,month=1,day=2) select id, val, name from oreilly.normalized_parquet
+where id between 1000001 and 2000000;
+[localhost:21000] > alter table partitioned_text add partition 
(year=2014,month=1,day=3);
+[localhost:21000] > alter table partitioned_text add partition 
(year=2014,month=1,day=4);
+[localhost:21000] > show partitions partitioned_text;
++-------+-------+-----+-------+--------+---------+--------------+-------------------+--------+-------------------+
+| year  | month | day | #Rows | #Files | Size    | Bytes Cached | Cache 
Replication | Format | Incremental stats |
++-------+-------+-----+-------+--------+---------+--------------+-------------------+--------+-------------------+
+| 2014  | 1     | 1   | -1    | 4      | 25.16MB | NOT CACHED   | NOT CACHED   
     | TEXT   | false             |
+| 2014  | 1     | 2   | -1    | 4      | 26.22MB | NOT CACHED   | NOT CACHED   
     | TEXT   | false             |
+| 2014  | 1     | 3   | -1    | 0      | 0B      | NOT CACHED   | NOT CACHED   
     | TEXT   | false             |
+| 2014  | 1     | 4   | -1    | 0      | 0B      | NOT CACHED   | NOT CACHED   
     | TEXT   | false             |
+| Total |       |     | -1    | 8      | 51.38MB | 0B           |              
     |        |                   |
++-------+-------+-----+-------+--------+---------+--------------+-------------------+--------+-------------------+
+[localhost:21000] > show files in partitioned_text;
++----------------------------------------------------------------------------------------------------------------+--------+-------------------------+
+| path                                                                         
                                  | size   | partition               |
++----------------------------------------------------------------------------------------------------------------+--------+-------------------------+
+| 
hdfs://<varname>impala_data_dir</varname>/show_files.db/partitioned_text/year=2014/month=1/day=1/80732d9dc80689f_1418645991_data.0.
  | 5.77MB | year=2014/month=1/day=1 |
+| 
hdfs://<varname>impala_data_dir</varname>/show_files.db/partitioned_text/year=2014/month=1/day=1/80732d9dc8068a0_1418645991_data.0.
  | 6.25MB | year=2014/month=1/day=1 |
+| 
hdfs://<varname>impala_data_dir</varname>/show_files.db/partitioned_text/year=2014/month=1/day=1/80732d9dc8068a1_147082319_data.0.
   | 7.16MB | year=2014/month=1/day=1 |
+| 
hdfs://<varname>impala_data_dir</varname>/show_files.db/partitioned_text/year=2014/month=1/day=1/80732d9dc8068a2_2111411753_data.0.
  | 5.98MB | year=2014/month=1/day=1 |
+| 
hdfs://<varname>impala_data_dir</varname>/show_files.db/partitioned_text/year=2014/month=1/day=2/21a828cf494b5bbb_501271652_data.0.
  | 6.42MB | year=2014/month=1/day=2 |
+| 
hdfs://<varname>impala_data_dir</varname>/show_files.db/partitioned_text/year=2014/month=1/day=2/21a828cf494b5bbc_501271652_data.0.
  | 6.62MB | year=2014/month=1/day=2 |
+| 
hdfs://<varname>impala_data_dir</varname>/show_files.db/partitioned_text/year=2014/month=1/day=2/21a828cf494b5bbd_1393490200_data.0.
 | 6.98MB | year=2014/month=1/day=2 |
+| 
hdfs://<varname>impala_data_dir</varname>/show_files.db/partitioned_text/year=2014/month=1/day=2/21a828cf494b5bbe_1393490200_data.0.
 | 6.20MB | year=2014/month=1/day=2 |
++----------------------------------------------------------------------------------------------------------------+--------+-------------------------+
+</codeblock>
+      <p>
+        The following example shows a <codeph>SHOW FILES</codeph> statement 
for a partitioned Parquet table.
+        The number and sizes of files are different from the equivalent 
partitioned text table
+        used in the previous example, because <codeph>INSERT</codeph> 
operations for Parquet tables
+        are parallelized differently than for text tables. (Also, the amount 
of data is so small
+        that it can be written to Parquet without involving all the hosts in 
this 4-node cluster.)
+      </p>
+<codeblock>[localhost:21000] > create table partitioned_parquet (x bigint, y 
int, s string) partitioned by (year bigint, month bigint, day bigint) stored as 
parquet;
+[localhost:21000] > insert into partitioned_parquet partition (year,month,day) 
select x, y, s, year, month, day from partitioned_text;
+[localhost:21000] > show partitions partitioned_parquet;
++-------+-------+-----+-------+--------+---------+--------------+-------------------+---------+-------------------+
+| year  | month | day | #Rows | #Files | Size    | Bytes Cached | Cache 
Replication | Format  | Incremental stats |
++-------+-------+-----+-------+--------+---------+--------------+-------------------+---------+-------------------+
+| 2014  | 1     | 1   | -1    | 3      | 17.89MB | NOT CACHED   | NOT CACHED   
     | PARQUET | false             |
+| 2014  | 1     | 2   | -1    | 3      | 17.89MB | NOT CACHED   | NOT CACHED   
     | PARQUET | false             |
+| Total |       |     | -1    | 6      | 35.79MB | 0B           |              
     |         |                   |
++-------+-------+-----+-------+--------+---------+--------------+-------------------+---------+-------------------+
+[localhost:21000] > show files in partitioned_parquet;
++---------------------------------------------------------------------------------------------------------+--------+-------------------------+
+| path                                                                         
                           | size   | partition               |
++---------------------------------------------------------------------------------------------------------+--------+-------------------------+
+| 
hdfs://<varname>impala_data_dir</varname>/show_files.db/partitioned_parquet/year=2014/month=1/day=1/1134113650_data.0.parq
 | 4.49MB | year=2014/month=1/day=1 |
+| 
hdfs://<varname>impala_data_dir</varname>/show_files.db/partitioned_parquet/year=2014/month=1/day=1/617567880_data.0.parq
  | 5.14MB | year=2014/month=1/day=1 |
+| 
hdfs://<varname>impala_data_dir</varname>/show_files.db/partitioned_parquet/year=2014/month=1/day=1/2099499416_data.0.parq
 | 8.27MB | year=2014/month=1/day=1 |
+| 
hdfs://<varname>impala_data_dir</varname>/show_files.db/partitioned_parquet/year=2014/month=1/day=2/945567189_data.0.parq
  | 8.80MB | year=2014/month=1/day=2 |
+| 
hdfs://<varname>impala_data_dir</varname>/show_files.db/partitioned_parquet/year=2014/month=1/day=2/2145850112_data.0.parq
 | 4.80MB | year=2014/month=1/day=2 |
+| 
hdfs://<varname>impala_data_dir</varname>/show_files.db/partitioned_parquet/year=2014/month=1/day=2/665613448_data.0.parq
  | 4.29MB | year=2014/month=1/day=2 |
++---------------------------------------------------------------------------------------------------------+--------+-------------------------+
+</codeblock>
+<p>
+  The following example shows output from the <codeph>SHOW FILES</codeph> 
statement
+  for a table where the data files are stored in Amazon S3:
+</p>
+<codeblock>[localhost:21000] > show files in s3_testing.sample_data_s3;
++-----------------------------------------------------------------------+---------+-----------+
+| path                                                                  | size 
   | partition |
++-----------------------------------------------------------------------+---------+-----------+
+| s3a://impala-demo/sample_data/e065453cba1988a6_1733868553_data.0.parq | 
24.84MB |           |
++-----------------------------------------------------------------------+---------+-----------+
+</codeblock>
+<!--
+      <p conref="../shared/impala_common.xml#common/related_info"/>
+
+      <p>
+        <xref href="impala_authorization.xml#authorization"/>
+      </p>
+-->
+    </conbody>
+  </concept>
+
+  <concept rev="2.0.0" id="show_roles">
+
+    <title>SHOW ROLES Statement</title>
+  <prolog>
+    <metadata>
+      <data name="Category" value="Sentry"/>
+      <data name="Category" value="Roles"/>
+    </metadata>
+  </prolog>
+
+    <conbody>
+
+      <p>
+        The <codeph>SHOW ROLES</codeph> statement displays roles. This syntax 
is available in CDH 5.2 and later
+        only, when you are using the Sentry authorization framework along with 
the Sentry service, as described in
+        <xref href="impala_authorization.xml#sentry_service"/>. It does not 
apply when you use the Sentry framework
+        with privileges defined in a policy file.
+      </p>
+
+      <p conref="../shared/impala_common.xml#common/security_blurb"/>
+
+      <p conref="../shared/impala_common.xml#common/show_security"/>
+
+      <p conref="../shared/impala_common.xml#common/example_blurb"/>
+
+      <p>
+        Depending on the roles set up within your organization by the 
<codeph>CREATE ROLE</codeph> statement, the
+        output might look something like this:
+      </p>
+
+<codeblock>show roles;
++-----------+
+| role_name |
++-----------+
+| analyst   |
+| role1     |
+| sales     |
+| superuser |
+| test_role |
++-----------+
+</codeblock>
+
+      <p conref="../shared/impala_common.xml#common/permissions_blurb_no"/>
+
+      <p conref="../shared/impala_common.xml#common/related_info"/>
+
+      <p>
+        <xref href="impala_authorization.xml#authorization"/>
+      </p>
+    </conbody>
+  </concept>
+
+  <concept rev="2.0.0" id="show_current_role">
+
+    <title>SHOW CURRENT ROLE</title>
+  <prolog>
+    <metadata>
+      <data name="Category" value="Sentry"/>
+      <data name="Category" value="Roles"/>
+    </metadata>
+  </prolog>
+
+    <conbody>
+
+      <p rev="2.0.0">
+        The <codeph>SHOW CURRENT ROLE</codeph> statement displays roles 
assigned to the current user. This syntax
+        is available in CDH 5.2 and later only, when you are using the Sentry 
authorization framework along with
+        the Sentry service, as described in <xref 
href="impala_authorization.xml#sentry_service"/>. It does not
+        apply when you use the Sentry framework with privileges defined in a 
policy file.
+      </p>
+
+      <p conref="../shared/impala_common.xml#common/security_blurb"/>
+
+      <p conref="../shared/impala_common.xml#common/show_security"/>
+
+      <p conref="../shared/impala_common.xml#common/example_blurb"/>
+
+      <p>
+        Depending on the roles set up within your organization by the 
<codeph>CREATE ROLE</codeph> statement, the
+        output might look something like this:
+      </p>
+
+<codeblock>show current roles;
++-----------+
+| role_name |
++-----------+
+| role1     |
+| superuser |
++-----------+
+</codeblock>
+
+      <p conref="../shared/impala_common.xml#common/permissions_blurb_no"/>
+
+      <p conref="../shared/impala_common.xml#common/related_info"/>
+
+      <p>
+        <xref href="impala_authorization.xml#authorization"/>
+      </p>
+    </conbody>
+  </concept>
+
+  <concept id="show_role_grant">
+
+    <title>SHOW ROLE GRANT Statement</title>
+  <prolog>
+    <metadata>
+      <data name="Category" value="Sentry"/>
+      <data name="Category" value="Roles"/>
+    </metadata>
+  </prolog>
+
+
+    <conbody>
+
+      <p rev="2.0.0">
+<!-- Copied from Sentry docs. Turn into conref. I did some rewording for 
clarity. -->
+        The <codeph>SHOW ROLE GRANT</codeph> statement lists all the roles 
assigned to the specified group. This
+        statement is only allowed for Sentry administrative users and others 
users that are part of the specified
+        group. This syntax is available in CDH 5.2 and later only, when you 
are using the Sentry authorization
+        framework along with the Sentry service, as described in
+        <xref href="impala_authorization.xml#sentry_service"/>. It does not 
apply when you use the Sentry framework
+        with privileges defined in a policy file.
+      </p>
+
+      <p conref="../shared/impala_common.xml#common/security_blurb"/>
+
+      <p conref="../shared/impala_common.xml#common/show_security"/>
+
+<!--
+<p conref="../shared/impala_common.xml#common/example_blurb"/>
+<codeblock>To do: construct example for SHOW ROLE GRANT</codeblock>
+-->
+
+      <p conref="../shared/impala_common.xml#common/permissions_blurb_no"/>
+
+      <p conref="../shared/impala_common.xml#common/related_info"/>
+
+      <p>
+        <xref href="impala_authorization.xml#authorization"/>
+      </p>
+    </conbody>
+  </concept>
+
+  <concept rev="2.0.0" id="show_grant_role">
+
+    <title>SHOW GRANT ROLE Statement</title>
+  <prolog>
+    <metadata>
+      <data name="Category" value="Sentry"/>
+      <data name="Category" value="Roles"/>
+    </metadata>
+  </prolog>
+
+
+    <conbody>
+
+      <p>
+<!-- Copied from Sentry docs. Turn into conref. I did some rewording for 
clarity. -->
+        The <codeph>SHOW GRANT ROLE</codeph> statement list all the grants for 
the given role name. This statement
+        is only allowed for Sentry administrative users and other users that 
have been granted the specified role.
+        This syntax is available in CDH 5.2 and later only, when you are using 
the Sentry authorization framework
+        along with the Sentry service, as described in <xref 
href="impala_authorization.xml#sentry_service"/>. It
+        does not apply when you use the Sentry framework with privileges 
defined in a policy file.
+      </p>
+
+      <p conref="../shared/impala_common.xml#common/security_blurb"/>
+
+      <p conref="../shared/impala_common.xml#common/show_security"/>
+
+<!--
+<p conref="../shared/impala_common.xml#common/example_blurb"/>
+<codeblock>To do: construct example for SHOW GRANT ROLE</codeblock>
+-->
+
+      <p conref="../shared/impala_common.xml#common/permissions_blurb_no"/>
+
+      <p conref="../shared/impala_common.xml#common/related_info"/>
+
+      <p>
+        <xref href="impala_authorization.xml#authorization"/>
+      </p>
+    </conbody>
+  </concept>
+
+  <concept id="show_databases">
+
+    <title>SHOW DATABASES</title>
+  <prolog>
+    <metadata>
+      <data name="Category" value="Databases"/>
+      <data name="Category" value="Schemas"/>
+    </metadata>
+  </prolog>
+
+    <conbody>
+
+      <p>
+        The <codeph>SHOW DATABASES</codeph> statement is often the first one 
you issue when connecting to an
+        instance for the first time. You typically issue <codeph>SHOW 
DATABASES</codeph> to see the names you can
+        specify in a <codeph>USE <varname>db_name</varname></codeph> 
statement, then after switching to a database
+        you issue <codeph>SHOW TABLES</codeph> to see the names you can 
specify in <codeph>SELECT</codeph> and
+        <codeph>INSERT</codeph> statements.
+      </p>
+
+      <p>
+        The output of <codeph>SHOW DATABASES</codeph> includes the special 
<codeph>_impala_builtins</codeph>
+        database, which lets you view definitions of built-in functions, as 
described under <codeph>SHOW
+        FUNCTIONS</codeph>.
+      </p>
+
+      <p conref="../shared/impala_common.xml#common/security_blurb"/>
+
+      <p conref="../shared/impala_common.xml#common/show_security"/>
+
+      <p conref="../shared/impala_common.xml#common/example_blurb"/>
+
+      <p>
+        This example shows how you might locate a particular table on an 
unfamiliar system. The
+        <codeph>DEFAULT</codeph> database is the one you initially connect to; 
a database with that name is present
+        on every system. You can issue <codeph>SHOW TABLES IN 
<varname>db_name</varname></codeph> without going
+        into a database, or <codeph>SHOW TABLES</codeph> once you are inside a 
particular database.
+      </p>
+
+<codeblock>[localhost:21000] &gt; show databases;
++--------------------+
+| name               |
++--------------------+
+| _impala_builtins   |
+| analyze_testing    |
+| avro               |
+| ctas               |
+| d1                 |
+| d2                 |
+| d3                 |
+| default            |
+| file_formats       |
+| hbase              |
+| load_data          |
+| partitioning       |
+| regexp_testing     |
+| reports            |
+| temporary          |
++--------------------+
+Returned 14 row(s) in 0.02s
+[localhost:21000] &gt; show tables in file_formats;
++--------------------+
+| name               |
++--------------------+
+| parquet_table      |
+| rcfile_table       |
+| sequencefile_table |
+| textfile_table     |
++--------------------+
+Returned 4 row(s) in 0.01s
+[localhost:21000] &gt; use file_formats;
+[localhost:21000] &gt; show tables like '*parq*';
++--------------------+
+| name               |
++--------------------+
+| parquet_table      |
++--------------------+
+Returned 1 row(s) in 0.01s</codeblock>
+
+      <p conref="../shared/impala_common.xml#common/permissions_blurb_no"/>
+
+      <p conref="../shared/impala_common.xml#common/related_info"/>
+
+      <p>
+        <xref href="impala_databases.xml#databases"/>, <xref 
href="impala_create_database.xml#create_database"/>,
+        <xref href="impala_drop_database.xml#drop_database"/>, <xref 
href="impala_use.xml#use"/>
+        <xref href="impala_show.xml#show_tables"/>,
+        <xref href="impala_show.xml#show_functions"/>
+      </p>
+    </conbody>
+  </concept>
+
+  <concept id="show_tables">
+
+    <title>SHOW TABLES Statement</title>
+  <prolog>
+    <metadata>
+      <data name="Category" value="Tables"/>
+      <data name="Category" value="Schemas"/>
+    </metadata>
+  </prolog>
+
+    <conbody>
+
+      <p>
+        Displays the names of tables. By default, lists tables in the current 
database, or with the
+        <codeph>IN</codeph> clause, in a specified database. By default, lists 
all tables, or with the
+        <codeph>LIKE</codeph> clause, only those whose name match a pattern 
with <codeph>*</codeph> wildcards.
+      </p>
+
+      <p conref="../shared/impala_common.xml#common/security_blurb"/>
+
+      <p conref="../shared/impala_common.xml#common/show_security"/>
+
+      <p rev="CDH-19187">
+        The user ID that the <cmdname>impalad</cmdname> daemon runs under,
+        typically the <codeph>impala</codeph> user, must have read and execute
+        permissions for all directories that are part of the table.
+        (A table could span multiple different HDFS directories if it is 
partitioned.
+        The directories could be widely scattered because a partition can 
reside
+        in an arbitrary HDFS directory based on its <codeph>LOCATION</codeph> 
attribute.)
+      </p>
+
+      <p conref="../shared/impala_common.xml#common/example_blurb"/>
+
+      <p>
+        The following examples demonstrate the <codeph>SHOW TABLES</codeph> 
statement.
+        If the database contains no tables, the result set is empty.
+        If the database does contain tables, <codeph>SHOW TABLES IN 
<varname>db_name</varname></codeph>
+        lists all the table names. <codeph>SHOW TABLES</codeph> with no 
qualifiers lists
+        all the table names in the current database.
+      </p>
+
+<codeblock>create database empty_db;
+show tables in empty_db;
+Fetched 0 row(s) in 0.11s
+
+create database full_db;
+create table full_db.t1 (x int);
+create table full_db.t2 like full_db.t1;
+
+show tables in full_db;
++------+
+| name |
++------+
+| t1   |
+| t2   |
++------+
+
+use full_db;
+show tables;
++------+
+| name |
++------+
+| t1   |
+| t2   |
++------+
+</codeblock>
+
+      <p>
+        This example demonstrates how <codeph>SHOW TABLES LIKE 
'<varname>wildcard_pattern</varname>'</codeph>
+        lists table names that match a pattern, or multiple alternative 
patterns.
+        The ability to do wildcard matches for table names makes it helpful to 
establish naming conventions for tables to
+        conveniently locate a group of related tables.
+      </p>
+
+<codeblock>create table fact_tbl (x int);
+create table dim_tbl_1 (s string);
+create table dim_tbl_2 (s string);
+
+/* Asterisk is the wildcard character. Only 2 out of the 3 just-created tables 
are returned. */
+show tables like 'dim*';
++-----------+
+| name      |
++-----------+
+| dim_tbl_1 |
+| dim_tbl_2 |
++-----------+
+
+/* We are already in the FULL_DB database, but just to be sure we can specify 
the database name also. */
+show tables in full_db like 'dim*';
++-----------+
+| name      |
++-----------+
+| dim_tbl_1 |
+| dim_tbl_2 |
++-----------+
+
+/* The pipe character separates multiple wildcard patterns. */
+show tables like '*dim*|t*';
++-----------+
+| name      |
++-----------+
+| dim_tbl_1 |
+| dim_tbl_2 |
+| t1        |
+| t2        |
++-----------+
+</codeblock>
+
+      <p conref="../shared/impala_common.xml#common/permissions_blurb_no"/>
+
+      <p conref="../shared/impala_common.xml#common/related_info"/>
+
+      <p>
+        <xref href="impala_tables.xml#tables"/>, <xref 
href="impala_create_table.xml#create_table"/>,
+        <xref href="impala_alter_table.xml#alter_table"/>, <xref 
href="impala_drop_table.xml#drop_table"/>,
+        <xref href="impala_describe.xml#describe"/>, <xref 
href="impala_show.xml#show_create_table"/>,
+        <xref href="impala_show.xml#show_table_stats"/>,
+        <xref href="impala_show.xml#show_databases"/>,
+        <xref href="impala_show.xml#show_functions"/>
+      </p>
+    </conbody>
+  </concept>
+
+  <concept rev="1.2.1" id="show_create_table">
+
+    <title>SHOW CREATE TABLE Statement</title>
+  <prolog>
+    <metadata>
+      <data name="Category" value="Schemas"/>
+      <data name="Category" value="Impala Data Types"/>
+    </metadata>
+  </prolog>
+
+    <conbody>
+
+      <p>
+        As a schema changes over time, you might run a <codeph>CREATE 
TABLE</codeph> statement followed by several
+        <codeph>ALTER TABLE</codeph> statements. To capture the cumulative 
effect of all those statements,
+        <codeph>SHOW CREATE TABLE</codeph> displays a <codeph>CREATE 
TABLE</codeph> statement that would reproduce
+        the current structure of a table. You can use this output in scripts 
that set up or clone a group of
+        tables, rather than trying to reproduce the original sequence of 
<codeph>CREATE TABLE</codeph> and
+        <codeph>ALTER TABLE</codeph> statements. When creating variations on 
the original table, or cloning the
+        original table on a different system, you might need to edit the 
<codeph>SHOW CREATE TABLE</codeph> output
+        to change things such as the database name, <codeph>LOCATION</codeph> 
field, and so on that might be
+        different on the destination system.
+      </p>
+
+      <p conref="../shared/impala_common.xml#common/security_blurb"/>
+
+      <p conref="../shared/impala_common.xml#common/show_security"/>
+
+      <p conref="../shared/impala_common.xml#common/permissions_blurb_no"/>
+
+      <p conref="../shared/impala_common.xml#common/example_blurb"/>
+
+      <p>
+        The following example shows how various clauses from the 
<codeph>CREATE TABLE</codeph> statement are
+        represented in the output of <codeph>SHOW CREATE TABLE</codeph>.
+      </p>
+
+<codeblock>create table show_create_table_demo (id int comment "Unique ID", y 
double, s string)
+  partitioned by (year smallint)
+  stored as parquet;
+
+show create table show_create_table_demo;
++----------------------------------------------------------------------------------------+
+| result                                                                       
          |
++----------------------------------------------------------------------------------------+
+| CREATE TABLE scratch.show_create_table_demo (                                
          |
+|   id INT COMMENT 'Unique ID',                                                
          |
+|   y DOUBLE,                                                                  
          |
+|   s STRING                                                                   
          |
+| )                                                                            
          |
+| PARTITIONED BY (                                                             
          |
+|   year SMALLINT                                                              
          |
+| )                                                                            
          |
+| STORED AS PARQUET                                                            
          |
+| LOCATION 
'hdfs://127.0.0.1:8020/user/hive/warehouse/scratch.db/show_create_table_demo' |
+| TBLPROPERTIES ('transient_lastDdlTime'='1418152582')                         
          |
++----------------------------------------------------------------------------------------+
+</codeblock>
+
+      <p>
+        The following example shows how, after a sequence of <codeph>ALTER 
TABLE</codeph> statements, the output
+        from <codeph>SHOW CREATE TABLE</codeph> represents the current state 
of the table. This output could be
+        used to create a matching table rather than executing the original 
<codeph>CREATE TABLE</codeph> and
+        sequence of <codeph>ALTER TABLE</codeph> statements.
+      </p>
+
+<codeblock>alter table show_create_table_demo drop column s;
+alter table show_create_table_demo set fileformat textfile;
+
+show create table show_create_table_demo;
++----------------------------------------------------------------------------------------+
+| result                                                                       
          |
++----------------------------------------------------------------------------------------+
+| CREATE TABLE scratch.show_create_table_demo (                                
          |
+|   id INT COMMENT 'Unique ID',                                                
          |
+|   y DOUBLE                                                                   
          |
+| )                                                                            
          |
+| PARTITIONED BY (                                                             
          |
+|   year SMALLINT                                                              
          |
+| )                                                                            
          |
+| STORED AS TEXTFILE                                                           
          |
+| LOCATION 
'hdfs://127.0.0.1:8020/user/hive/warehouse/demo.db/show_create_table_demo'    |
+| TBLPROPERTIES ('transient_lastDdlTime'='1418152638')                         
          |
++----------------------------------------------------------------------------------------+
+</codeblock>
+
+      <p conref="../shared/impala_common.xml#common/related_info"/>
+
+      <p>
+        <xref href="impala_create_table.xml#create_table"/>, <xref 
href="impala_describe.xml#describe"/>,
+        <xref href="impala_show.xml#show_tables"/>
+      </p>
+    </conbody>
+  </concept>
+
+  <concept id="show_table_stats">
+
+    <title>SHOW TABLE STATS Statement</title>
+  <prolog>
+    <metadata>
+      <data name="Category" value="Performance"/>
+    </metadata>
+  </prolog>
+
+    <conbody>
+
+      <p>
+        The <codeph>SHOW TABLE STATS</codeph> and <codeph>SHOW COLUMN 
STATS</codeph> variants are important for
+        tuning performance and diagnosing performance issues, especially with 
the largest tables and the most
+        complex join queries.
+      </p>
+
+      <p>
+        Any values that are not available (because the <codeph>COMPUTE 
STATS</codeph> statement has not been run
+        yet) are displayed as <codeph>-1</codeph>.
+      </p>
+
+      <p>
+        <codeph>SHOW TABLE STATS</codeph> provides some general information 
about the table, such as the number of
+        files, overall size of the data, whether some or all of the data is in 
the HDFS cache, and the file format,
+        that is useful whether or not you have run the <codeph>COMPUTE 
STATS</codeph> statement. A
+        <codeph>-1</codeph> in the <codeph>#Rows</codeph> output column 
indicates that the <codeph>COMPUTE
+        STATS</codeph> statement has never been run for this table. If the 
table is partitioned, <codeph>SHOW TABLE
+        STATS</codeph> provides this information for each partition. (It 
produces the same output as the
+        <codeph>SHOW PARTITIONS</codeph> statement in this case.)
+      </p>
+
+      <p>
+        The output of <codeph>SHOW COLUMN STATS</codeph> is primarily only 
useful after the <codeph>COMPUTE
+        STATS</codeph> statement has been run on the table. A 
<codeph>-1</codeph> in the <codeph>#Distinct
+        Values</codeph> output column indicates that the <codeph>COMPUTE 
STATS</codeph> statement has never been
+        run for this table. Currently, Impala always leaves the 
<codeph>#Nulls</codeph> column as
+        <codeph>-1</codeph>, even after <codeph>COMPUTE STATS</codeph> has 
been run.
+      </p>
+
+      <p>
+        These <codeph>SHOW</codeph> statements work on actual tables only, not 
on views.
+      </p>
+
+      <p conref="../shared/impala_common.xml#common/security_blurb"/>
+
+      <p conref="../shared/impala_common.xml#common/show_security"/>
+
+      <p conref="../shared/impala_common.xml#common/example_blurb"/>
+
+      <p>
+        The following examples show how the <codeph>SHOW TABLE STATS</codeph> 
statement displays physical
+        information about a table and the associated data files:
+      </p>
+
+<codeblock>show table stats store_sales;
++-------+--------+----------+--------------+--------+-------------------+
+| #Rows | #Files | Size     | Bytes Cached | Format | Incremental stats |
++-------+--------+----------+--------------+--------+-------------------+
+| -1    | 1      | 370.45MB | NOT CACHED   | TEXT   | false             |
++-------+--------+----------+--------------+--------+-------------------+
+
+show table stats customer;
++-------+--------+---------+--------------+--------+-------------------+
+| #Rows | #Files | Size    | Bytes Cached | Format | Incremental stats |
++-------+--------+---------+--------------+--------+-------------------+
+| -1    | 1      | 12.60MB | NOT CACHED   | TEXT   | false             |
++-------+--------+---------+--------------+--------+-------------------+
+</codeblock>
+
+      <p>
+        The following example shows how, after a <codeph>COMPUTE 
STATS</codeph> or <codeph>COMPUTE INCREMENTAL
+        STATS</codeph> statement, the <codeph>#Rows</codeph> field is now 
filled in. Because the
+        <codeph>STORE_SALES</codeph> table in this example is not partitioned, 
the <codeph>COMPUTE INCREMENTAL
+        STATS</codeph> statement produces regular stats rather than 
incremental stats, therefore the
+        <codeph>Incremental stats</codeph> field remains 
<codeph>false</codeph>.
+      </p>
+
+<codeblock>compute stats customer;
++------------------------------------------+
+| summary                                  |
++------------------------------------------+
+| Updated 1 partition(s) and 18 column(s). |
++------------------------------------------+
+
+show table stats customer;
++--------+--------+---------+--------------+--------+-------------------+
+| #Rows  | #Files | Size    | Bytes Cached | Format | Incremental stats |
++--------+--------+---------+--------------+--------+-------------------+
+| 100000 | 1      | 12.60MB | NOT CACHED   | TEXT   | false             |
++--------+--------+---------+--------------+--------+-------------------+
+
+compute incremental stats store_sales;
++------------------------------------------+
+| summary                                  |
++------------------------------------------+
+| Updated 1 partition(s) and 23 column(s). |
++------------------------------------------+
+
+show table stats store_sales;
++---------+--------+----------+--------------+--------+-------------------+
+| #Rows   | #Files | Size     | Bytes Cached | Format | Incremental stats |
++---------+--------+----------+--------------+--------+-------------------+
+| 2880404 | 1      | 370.45MB | NOT CACHED   | TEXT   | false             |
++---------+--------+----------+--------------+--------+-------------------+
+</codeblock>
+
+      <p conref="../shared/impala_common.xml#common/permissions_blurb"/>
+      <p rev="CDH-19187">
+        The user ID that the <cmdname>impalad</cmdname> daemon runs under,
+        typically the <codeph>impala</codeph> user, must have read and execute
+        permissions for all directories that are part of the table.
+        (A table could span multiple different HDFS directories if it is 
partitioned.
+        The directories could be widely scattered because a partition can 
reside
+        in an arbitrary HDFS directory based on its <codeph>LOCATION</codeph> 
attribute.)
+        The Impala user must also have execute
+        permission for the database directory, and any parent directories of 
the database directory in HDFS.
+      </p>
+
+      <p conref="../shared/impala_common.xml#common/related_info"/>
+
+      <p>
+        <xref href="impala_compute_stats.xml#compute_stats"/>, <xref 
href="impala_show.xml#show_column_stats"/>
+      </p>
+
+      <p>
+        See <xref href="impala_perf_stats.xml#perf_stats"/> for usage 
information and examples.
+      </p>
+    </conbody>
+  </concept>
+
+  <concept id="show_column_stats">
+
+    <title>SHOW COLUMN STATS Statement</title>
+  <prolog>
+    <metadata>
+      <data name="Category" value="Performance"/>
+    </metadata>
+  </prolog>
+
+    <conbody>
+
+      <p>
+        The <codeph>SHOW TABLE STATS</codeph> and <codeph>SHOW COLUMN 
STATS</codeph> variants are important for
+        tuning performance and diagnosing performance issues, especially with 
the largest tables and the most
+        complex join queries.
+      </p>
+
+      <p conref="../shared/impala_common.xml#common/security_blurb"/>
+
+      <p conref="../shared/impala_common.xml#common/show_security"/>
+
+      <p conref="../shared/impala_common.xml#common/example_blurb"/>
+
+      <p>
+        The following examples show the output of the <codeph>SHOW COLUMN 
STATS</codeph> statement for some tables,
+        before the <codeph>COMPUTE STATS</codeph> statement is run. Impala 
deduces some information, such as
+        maximum and average size for fixed-length columns, and leaves and 
unknown values as <codeph>-1</codeph>.
+      </p>
+
+<codeblock>show column stats customer;
++------------------------+--------+------------------+--------+----------+----------+
+| Column                 | Type   | #Distinct Values | #Nulls | Max Size | Avg 
Size |
++------------------------+--------+------------------+--------+----------+----------+
+| c_customer_sk          | INT    | -1               | -1     | 4        | 4   
     |
+| c_customer_id          | STRING | -1               | -1     | -1       | -1  
     |
+| c_current_cdemo_sk     | INT    | -1               | -1     | 4        | 4   
     |
+| c_current_hdemo_sk     | INT    | -1               | -1     | 4        | 4   
     |
+| c_current_addr_sk      | INT    | -1               | -1     | 4        | 4   
     |
+| c_first_shipto_date_sk | INT    | -1               | -1     | 4        | 4   
     |
+| c_first_sales_date_sk  | INT    | -1               | -1     | 4        | 4   
     |
+| c_salutation           | STRING | -1               | -1     | -1       | -1  
     |
+| c_first_name           | STRING | -1               | -1     | -1       | -1  
     |
+| c_last_name            | STRING | -1               | -1     | -1       | -1  
     |
+| c_preferred_cust_flag  | STRING | -1               | -1     | -1       | -1  
     |
+| c_birth_day            | INT    | -1               | -1     | 4        | 4   
     |
+| c_birth_month          | INT    | -1               | -1     | 4        | 4   
     |
+| c_birth_year           | INT    | -1               | -1     | 4        | 4   
     |
+| c_birth_country        | STRING | -1               | -1     | -1       | -1  
     |
+| c_login                | STRING | -1               | -1     | -1       | -1  
     |
+| c_email_address        | STRING | -1               | -1     | -1       | -1  
     |
+| c_last_review_date     | STRING | -1               | -1     | -1       | -1  
     |
++------------------------+--------+------------------+--------+----------+----------+
+
+show column stats store_sales;
++-----------------------+-------+------------------+--------+----------+----------+
+| Column                | Type  | #Distinct Values | #Nulls | Max Size | Avg 
Size |
++-----------------------+-------+------------------+--------+----------+----------+
+| ss_sold_date_sk       | INT   | -1               | -1     | 4        | 4     
   |
+| ss_sold_time_sk       | INT   | -1               | -1     | 4        | 4     
   |
+| ss_item_sk            | INT   | -1               | -1     | 4        | 4     
   |
+| ss_customer_sk        | INT   | -1               | -1     | 4        | 4     
   |
+| ss_cdemo_sk           | INT   | -1               | -1     | 4        | 4     
   |
+| ss_hdemo_sk           | INT   | -1               | -1     | 4        | 4     
   |
+| ss_addr_sk            | INT   | -1               | -1     | 4        | 4     
   |
+| ss_store_sk           | INT   | -1               | -1     | 4        | 4     
   |
+| ss_promo_sk           | INT   | -1               | -1     | 4        | 4     
   |
+| ss_ticket_number      | INT   | -1               | -1     | 4        | 4     
   |
+| ss_quantity           | INT   | -1               | -1     | 4        | 4     
   |
+| ss_wholesale_cost     | FLOAT | -1               | -1     | 4        | 4     
   |
+| ss_list_price         | FLOAT | -1               | -1     | 4        | 4     
   |
+| ss_sales_price        | FLOAT | -1               | -1     | 4        | 4     
   |
+| ss_ext_discount_amt   | FLOAT | -1               | -1     | 4        | 4     
   |
+| ss_ext_sales_price    | FLOAT | -1               | -1     | 4        | 4     
   |
+| ss_ext_wholesale_cost | FLOAT | -1               | -1     | 4        | 4     
   |
+| ss_ext_list_price     | FLOAT | -1               | -1     | 4        | 4     
   |
+| ss_ext_tax            | FLOAT | -1               | -1     | 4        | 4     
   |
+| ss_coupon_amt         | FLOAT | -1               | -1     | 4        | 4     
   |
+| ss_net_paid           | FLOAT | -1               | -1     | 4        | 4     
   |
+| ss_net_paid_inc_tax   | FLOAT | -1               | -1     | 4        | 4     
   |
+| ss_net_profit         | FLOAT | -1               | -1     | 4        | 4     
   |
++-----------------------+-------+------------------+--------+----------+----------+
+</codeblock>
+
+      <p>
+        The following examples show the output of the <codeph>SHOW COLUMN 
STATS</codeph> statement for some tables,
+        after the <codeph>COMPUTE STATS</codeph> statement is run. Now most of 
the <codeph>-1</codeph> values are
+        changed to reflect the actual table data. The <codeph>#Nulls</codeph> 
column remains <codeph>-1</codeph>
+        because Impala does not use the number of <codeph>NULL</codeph> values 
to influence query planning.
+      </p>
+
+<codeblock>compute stats customer;
++------------------------------------------+
+| summary                                  |
++------------------------------------------+
+| Updated 1 partition(s) and 18 column(s). |
++------------------------------------------+
+
+compute stats store_sales;
++------------------------------------------+
+| summary                                  |
++------------------------------------------+
+| Updated 1 partition(s) and 23 column(s). |
++------------------------------------------+
+
+show column stats customer;
++------------------------+--------+------------------+--------+----------+--------+
+| Column                 | Type   | #Distinct Values | #Nulls | Max Size | Avg 
Size
++------------------------+--------+------------------+--------+----------+--------+
+| c_customer_sk          | INT    | 139017           | -1     | 4        | 4   
   |
+| c_customer_id          | STRING | 111904           | -1     | 16       | 16  
   |
+| c_current_cdemo_sk     | INT    | 95837            | -1     | 4        | 4   
   |
+| c_current_hdemo_sk     | INT    | 8097             | -1     | 4        | 4   
   |
+| c_current_addr_sk      | INT    | 57334            | -1     | 4        | 4   
   |
+| c_first_shipto_date_sk | INT    | 4374             | -1     | 4        | 4   
   |
+| c_first_sales_date_sk  | INT    | 4409             | -1     | 4        | 4   
   |
+| c_salutation           | STRING | 7                | -1     | 4        | 
3.1308 |
+| c_first_name           | STRING | 3887             | -1     | 11       | 
5.6356 |
+| c_last_name            | STRING | 4739             | -1     | 13       | 
5.9106 |
+| c_preferred_cust_flag  | STRING | 3                | -1     | 1        | 
0.9656 |
+| c_birth_day            | INT    | 31               | -1     | 4        | 4   
   |
+| c_birth_month          | INT    | 12               | -1     | 4        | 4   
   |
+| c_birth_year           | INT    | 71               | -1     | 4        | 4   
   |
+| c_birth_country        | STRING | 205              | -1     | 20       | 
8.4001 |
+| c_login                | STRING | 1                | -1     | 0        | 0   
   |
+| c_email_address        | STRING | 94492            | -1     | 46       | 
26.485 |
+| c_last_review_date     | STRING | 349              | -1     | 7        | 
6.7561 |
++------------------------+--------+------------------+--------+----------+--------+
+
+show column stats store_sales;
++-----------------------+-------+------------------+--------+----------+----------+
+| Column                | Type  | #Distinct Values | #Nulls | Max Size | Avg 
Size |
++-----------------------+-------+------------------+--------+----------+----------+
+| ss_sold_date_sk       | INT   | 4395             | -1     | 4        | 4     
   |
+| ss_sold_time_sk       | INT   | 63617            | -1     | 4        | 4     
   |
+| ss_item_sk            | INT   | 19463            | -1     | 4        | 4     
   |
+| ss_customer_sk        | INT   | 122720           | -1     | 4        | 4     
   |
+| ss_cdemo_sk           | INT   | 242982           | -1     | 4        | 4     
   |
+| ss_hdemo_sk           | INT   | 8097             | -1     | 4        | 4     
   |
+| ss_addr_sk            | INT   | 70770            | -1     | 4        | 4     
   |
+| ss_store_sk           | INT   | 6                | -1     | 4        | 4     
   |
+| ss_promo_sk           | INT   | 355              | -1     | 4        | 4     
   |
+| ss_ticket_number      | INT   | 304098           | -1     | 4        | 4     
   |
+| ss_quantity           | INT   | 105              | -1     | 4        | 4     
   |
+| ss_wholesale_cost     | FLOAT | 9600             | -1     | 4        | 4     
   |
+| ss_list_price         | FLOAT | 22191            | -1     | 4        | 4     
   |
+| ss_sales_price        | FLOAT | 20693            | -1     | 4        | 4     
   |
+| ss_ext_discount_amt   | FLOAT | 228141           | -1     | 4        | 4     
   |
+| ss_ext_sales_price    | FLOAT | 433550           | -1     | 4        | 4     
   |
+| ss_ext_wholesale_cost | FLOAT | 406291           | -1     | 4        | 4     
   |
+| ss_ext_list_price     | FLOAT | 574871           | -1     | 4        | 4     
   |
+| ss_ext_tax            | FLOAT | 91806            | -1     | 4        | 4     
   |
+| ss_coupon_amt         | FLOAT | 228141           | -1     | 4        | 4     
   |
+| ss_net_paid           | FLOAT | 493107           | -1     | 4        | 4     
   |
+| ss_net_paid_inc_tax   | FLOAT | 653523           | -1     | 4        | 4     
   |
+| ss_net_profit         | FLOAT | 611934           | -1     | 4        | 4     
   |
++-----------------------+-------+------------------+--------+----------+----------+
+</codeblock>
+
+      <p conref="../shared/impala_common.xml#common/permissions_blurb"/>
+      <p rev="CDH-19187">
+        The user ID that the <cmdname>impalad</cmdname> daemon runs under,
+        typically the <codeph>impala</codeph> user, must have read and execute
+        permissions for all directories that are part of the table.
+        (A table could span multiple different HDFS directories if it is 
partitioned.
+        The directories could be widely scattered because a partition can 
reside
+        in an arbitrary HDFS directory based on its <codeph>LOCATION</codeph> 
attribute.)
+        The Impala user must also have execute
+        permission for the database directory, and any parent directories of 
the database directory in HDFS.
+      </p>
+
+      <p conref="../shared/impala_common.xml#common/related_info"/>
+
+      <p>
+        <xref href="impala_compute_stats.xml#compute_stats"/>, <xref 
href="impala_show.xml#show_table_stats"/>
+      </p>
+
+      <p>
+        See <xref href="impala_perf_stats.xml#perf_stats"/> for usage 
information and examples.
+      </p>
+    </conbody>
+  </concept>
+
+  <concept rev="1.4.0" id="show_partitions">
+
+    <title>SHOW PARTITIONS Statement</title>
+  <prolog>
+    <metadata>
+      <data name="Category" value="Schemas"/>
+      <!-- At some point, need to figure out categories related to 
partitioning. (Partitioned Tables etc.) -->
+    </metadata>
+  </prolog>
+
+    <conbody>
+
+      <p>
+        <codeph>SHOW PARTITIONS</codeph> displays information about each 
partition for a partitioned table. (The
+        output is the same as the <codeph>SHOW TABLE STATS</codeph> statement, 
but <codeph>SHOW PARTITIONS</codeph>
+        only works on a partitioned table.) Because it displays table 
statistics for all partitions, the output is
+        more informative if you have run the <codeph>COMPUTE STATS</codeph> 
statement after creating all the
+        partitions. See <xref href="impala_compute_stats.xml#compute_stats"/> 
for details. For example, on a
+        <codeph>CENSUS</codeph> table partitioned on the <codeph>YEAR</codeph> 
column:
+      </p>
+
+      <p conref="../shared/impala_common.xml#common/security_blurb"/>
+
+      <p conref="../shared/impala_common.xml#common/show_security"/>
+
+      <p conref="../shared/impala_common.xml#common/example_blurb"/>
+
+<codeblock rev="1.4.0">[localhost:21000] &gt; show partitions census;
++-------+-------+--------+------+---------+
+| year  | #Rows | #Files | Size | Format  |
++-------+-------+--------+------+---------+
+| 2000  | -1    | 0      | 0B   | TEXT    |
+| 2004  | -1    | 0      | 0B   | TEXT    |
+| 2008  | -1    | 0      | 0B   | TEXT    |
+| 2010  | -1    | 0      | 0B   | TEXT    |
+| 2011  | 4     | 1      | 22B  | TEXT    |
+| 2012  | 4     | 1      | 22B  | TEXT    |
+| 2013  | 1     | 1      | 231B | PARQUET |
+| Total | 9     | 3      | 275B |         |
++-------+-------+--------+------+---------+
+</codeblock>
+
+      <p conref="../shared/impala_common.xml#common/permissions_blurb"/>
+      <p rev="CDH-19187">
+        The user ID that the <cmdname>impalad</cmdname> daemon runs under,
+        typically the <codeph>impala</codeph> user, must have read and execute
+        permissions for all directories that are part of the table.
+        (A table could span multiple different HDFS directories if it is 
partitioned.
+        The directories could be widely scattered because a partition can 
reside
+        in an arbitrary HDFS directory based on its <codeph>LOCATION</codeph> 
attribute.)
+        The Impala user must also have execute
+        permission for the database directory, and any parent directories of 
the database directory in HDFS.
+      </p>
+
+      <p conref="../shared/impala_common.xml#common/related_info"/>
+
+      <p>
+        See <xref href="impala_perf_stats.xml#perf_stats"/> for usage 
information and examples.
+      </p>
+
+      <p>
+        <xref href="impala_show.xml#show_table_stats"/>, <xref 
href="impala_partitioning.xml#partitioning"/>
+      </p>
+    </conbody>
+  </concept>
+
+  <concept rev="1.3.0" id="show_functions">
+
+    <title>SHOW FUNCTIONS Statement</title>
+  <prolog>
+    <metadata>
+      <data name="Category" value="Impala Functions"/>
+      <data name="Category" value="UDFs"/>
+    </metadata>
+  </prolog>
+
+    <conbody>
+
+      <p>
+        By default, <codeph>SHOW FUNCTIONS</codeph> displays user-defined 
functions (UDFs) and <codeph>SHOW
+        AGGREGATE FUNCTIONS</codeph> displays user-defined aggregate functions 
(UDAFs) associated with a particular
+        database. The output from <codeph>SHOW FUNCTIONS</codeph> includes the 
argument signature of each function.
+        You specify this argument signature as part of the <codeph>DROP 
FUNCTION</codeph> statement. You might have
+        several UDFs with the same name, each accepting different argument 
data types.
+      </p>
+
+      <p conref="../shared/impala_common.xml#common/security_blurb"/>
+
+      <p conref="../shared/impala_common.xml#common/show_security"/>
+
+      <p conref="../shared/impala_common.xml#common/permissions_blurb_no"/>
+
+      <p conref="../shared/impala_common.xml#common/example_blurb"/>
+
+      <p>
+        To display Impala built-in functions, specify the special database 
name <codeph>_impala_builtins</codeph>:
+      </p>
+
+<codeblock>show functions in _impala_builtins;
++----------------+----------------------------------------+
+| return type    | signature                              |
++----------------+----------------------------------------+
+| BOOLEAN        | ifnull(BOOLEAN, BOOLEAN)               |
+| TINYINT        | ifnull(TINYINT, TINYINT)               |
+| SMALLINT       | ifnull(SMALLINT, SMALLINT)             |
+| INT            | ifnull(INT, INT)                       |
+...
+
+show functions in _impala_builtins like '*week*';
++-------------+------------------------------+
+| return type | signature                    |
++-------------+------------------------------+
+| INT         | weekofyear(TIMESTAMP)        |
+| TIMESTAMP   | weeks_add(TIMESTAMP, INT)    |
+| TIMESTAMP   | weeks_add(TIMESTAMP, BIGINT) |
+| TIMESTAMP   | weeks_sub(TIMESTAMP, INT)    |
+| TIMESTAMP   | weeks_sub(TIMESTAMP, BIGINT) |
+| INT         | dayofweek(TIMESTAMP)         |
++-------------+------------------------------+
+</codeblock>
+
+      <p>
+        To search for functions that use a particular data type, specify a 
case-sensitive data type name in all
+        capitals:
+      </p>
+
+<codeblock>show functions in _impala_builtins like '*BIGINT*';
++----------------------------------------+
+| name                                   |
++----------------------------------------+
+| adddate(TIMESTAMP, BIGINT)             |
+| bin(BIGINT)                            |
+| coalesce(BIGINT...)                    |
+...</codeblock>
+
+      <p conref="../shared/impala_common.xml#common/related_info"/>
+
+      <p>
+        <xref href="impala_functions_overview.xml#functions"/>, <xref 
href="impala_functions.xml#builtins"/>,
+        <xref href="impala_udf.xml#udfs"/>,
+        <xref href="impala_show.xml#show_databases"/>,
+        <xref href="impala_show.xml#show_tables"/>
+      </p>
+    </conbody>
+  </concept>
+
+  <concept rev="someday" audience="Cloudera" id="show_data_sources">
+
+    <title>SHOW DATA SOURCES Statement (CDH x.y and later only)</title>
+
+    <conbody>
+
+      <p>
+        <codeph>SHOW DATA SOURCES</codeph> lists the external data sources 
defined by the <codeph>CREATE DATA
+        SOURCE</codeph> statement. To show only those names matching a 
pattern, use the <codeph>LIKE</codeph>
+        clause with asterisks for wildcards, for example <codeph>SHOW DATA 
SOURCES LIKE '*sql*'</codeph>. These
+        data sources are global, not associated with a specific Impala 
database, so there is no <codeph>IN</codeph>
+        clause as in most other kinds of objects.
+      </p>
+
+<!--
+<p conref="../shared/impala_common.xml#common/example_blurb"/>
+<codeblock>To do: construct example for SHOW DATA SOURCES when that statement 
is externalized</codeblock>
+-->
+
+      <p conref="../shared/impala_common.xml#common/related_info"/>
+
+      <p></p>
+    </conbody>
+  </concept>
+</concept>


http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/463ddf92/docs/topics/impala_smallint.xml
----------------------------------------------------------------------
diff --git a/docs/topics/impala_smallint.xml b/docs/topics/impala_smallint.xml
new file mode 100644
index 0000000..3aae9ad
--- /dev/null
+++ b/docs/topics/impala_smallint.xml
@@ -0,0 +1,101 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE concept PUBLIC "-//OASIS//DTD DITA Concept//EN" "concept.dtd">
+<concept id="smallint">
+
+  <title>SMALLINT Data Type</title>
+  <titlealts><navtitle>SMALLINT</navtitle></titlealts>
+  <prolog>
+    <metadata>
+      <data name="Category" value="Impala"/>
+      <data name="Category" value="Impala Data Types"/>
+      <data name="Category" value="SQL"/>
+      <data name="Category" value="Data Analysts"/>
+      <data name="Category" value="Developers"/>
+      <data name="Category" value="Schemas"/>
+    </metadata>
+  </prolog>
+
+  <conbody>
+
+    <p>
+      A 2-byte integer data type used in <codeph>CREATE TABLE</codeph> and 
<codeph>ALTER TABLE</codeph> statements.
+    </p>
+
+    <p conref="../shared/impala_common.xml#common/syntax_blurb"/>
+
+    <p>
+      In the column definition of a <codeph>CREATE TABLE</codeph> statement:
+    </p>
+
+<codeblock><varname>column_name</varname> SMALLINT</codeblock>
+
+    <p>
+      <b>Range:</b> -32768 .. 32767. There is no <codeph>UNSIGNED</codeph> 
subtype.
+    </p>
+
+    <p>
+      <b>Conversions:</b> Impala automatically converts to a larger integer 
type (<codeph>INT</codeph> or
+      <codeph>BIGINT</codeph>) or a floating-point type 
(<codeph>FLOAT</codeph> or <codeph>DOUBLE</codeph>)
+      automatically. Use <codeph>CAST()</codeph> to convert to 
<codeph>TINYINT</codeph>, <codeph>STRING</codeph>,
+      or <codeph>TIMESTAMP</codeph>.
+      <ph conref="../shared/impala_common.xml#common/cast_int_to_timestamp"/>
+    </p>
+
+    <p conref="../shared/impala_common.xml#common/usage_notes_blurb"/>
+
+    <p>
+      For a convenient and automated way to check the bounds of the 
<codeph>SMALLINT</codeph> type, call the
+      functions <codeph>MIN_SMALLINT()</codeph> and 
<codeph>MAX_SMALLINT()</codeph>.
+    </p>
+
+    <p>
+      If an integer value is too large to be represented as a 
<codeph>SMALLINT</codeph>, use an
+      <codeph>INT</codeph> instead.
+    </p>
+
+    <p conref="../shared/impala_common.xml#common/null_bad_numeric_cast"/>
+
+    <p conref="../shared/impala_common.xml#common/example_blurb"/>
+
+<codeblock>CREATE TABLE t1 (x SMALLINT);
+SELECT CAST(1000 AS SMALLINT);
+</codeblock>
+
+    <p conref="../shared/impala_common.xml#common/parquet_blurb"/>
+
+<!-- Duplicated under TINYINT and SMALLINT. Turn into a conref in both places. 
-->
+
+    <p rev="1.4.0">
+      Physically, Parquet files represent <codeph>TINYINT</codeph> and 
<codeph>SMALLINT</codeph> values as 32-bit
+      integers. Although Impala rejects attempts to insert out-of-range values 
into such columns, if you create a
+      new table with the <codeph>CREATE TABLE ... LIKE PARQUET</codeph> 
syntax, any <codeph>TINYINT</codeph> or
+      <codeph>SMALLINT</codeph> columns in the original table turn into 
<codeph>INT</codeph> columns in the new
+      table.
+    </p>
+
+    <p conref="../shared/impala_common.xml#common/partitioning_good"/>
+
+    <p conref="../shared/impala_common.xml#common/hbase_ok"/>
+
+    <p conref="../shared/impala_common.xml#common/text_bulky"/>
+
+<!-- <p 
conref="/Content/impala_common_xi44078.xml#common/compatibility_blurb"/> -->
+
+    <p conref="../shared/impala_common.xml#common/internals_2_bytes"/>
+
+    <p conref="../shared/impala_common.xml#common/added_forever"/>
+
+    <p conref="../shared/impala_common.xml#common/column_stats_constant"/>
+
+<!-- <p 
conref="/Content/impala_common_xi44078.xml#common/restrictions_blurb"/> -->
+
+    <p conref="../shared/impala_common.xml#common/related_info"/>
+
+    <p>
+      <xref href="impala_literals.xml#numeric_literals"/>, <xref 
href="impala_tinyint.xml#tinyint"/>,
+      <xref href="impala_smallint.xml#smallint"/>, <xref 
href="impala_int.xml#int"/>,
+      <xref href="impala_bigint.xml#bigint"/>, <xref 
href="impala_decimal.xml#decimal"/>,
+      <xref href="impala_math_functions.xml#math_functions"/>
+    </p>
+  </conbody>
+</concept>

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/463ddf92/docs/topics/impala_stddev.xml
----------------------------------------------------------------------
diff --git a/docs/topics/impala_stddev.xml b/docs/topics/impala_stddev.xml
new file mode 100644
index 0000000..0cdff45
--- /dev/null
+++ b/docs/topics/impala_stddev.xml
@@ -0,0 +1,116 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE concept PUBLIC "-//OASIS//DTD DITA Concept//EN" "concept.dtd">
+<concept rev="1.4" id="stddev">
+
+  <title>STDDEV, STDDEV_SAMP, STDDEV_POP Functions</title>
+  <titlealts><navtitle>STDDEV, STDDEV_SAMP, STDDEV_POP</navtitle></titlealts>
+  <prolog>
+    <metadata>
+      <data name="Category" value="Impala"/>
+      <data name="Category" value="SQL"/>
+      <data name="Category" value="Impala Functions"/>
+      <data name="Category" value="Aggregate Functions"/>
+      <data name="Category" value="Querying"/>
+    </metadata>
+  </prolog>
+
+  <conbody>
+
+    <p>
+      <indexterm audience="Cloudera">stddev() function</indexterm>
+      <indexterm audience="Cloudera">stddev_samp() function</indexterm>
+      <indexterm audience="Cloudera">stddev_pop() function</indexterm>
+      An aggregate function that
+      <xref href="http://en.wikipedia.org/wiki/Standard_deviation"; 
scope="external" format="html">standard
+      deviation</xref> of a set of numbers.
+    </p>
+
+    <p conref="../shared/impala_common.xml#common/syntax_blurb"/>
+
+<codeblock>{ STDDEV | STDDEV_SAMP | STDDEV_POP } ([DISTINCT | ALL] 
<varname>expression</varname>)</codeblock>
+
+    <p>
+      This function works with any numeric data type.
+    </p>
+
+    <p 
conref="../shared/impala_common.xml#common/former_odd_return_type_string"/>
+
+    <p>
+      This function is typically used in mathematical formulas related to 
probability distributions.
+    </p>
+
+    <p>
+      The <codeph>STDDEV_POP()</codeph> and <codeph>STDDEV_SAMP()</codeph> 
functions compute the population
+      standard deviation and sample standard deviation, respectively, of the 
input values.
+      (<codeph>STDDEV()</codeph> is an alias for 
<codeph>STDDEV_SAMP()</codeph>.) Both functions evaluate all input
+      rows matched by the query. The difference is that 
<codeph>STDDEV_SAMP()</codeph> is scaled by
+      <codeph>1/(N-1)</codeph> while <codeph>STDDEV_POP()</codeph> is scaled 
by <codeph>1/N</codeph>.
+    </p>
+
+    <p>
+      If no input rows match the query, the result of any of these functions 
is <codeph>NULL</codeph>. If a single
+      input row matches the query, the result of any of these functions is 
<codeph>"0.0"</codeph>.
+    </p>
+
+    <p conref="../shared/impala_common.xml#common/example_blurb"/>
+
+    <p>
+      This example demonstrates how <codeph>STDDEV()</codeph> and 
<codeph>STDDEV_SAMP()</codeph> return the same
+      result, while <codeph>STDDEV_POP()</codeph> uses a slightly different 
calculation to reflect that the input
+      data is considered part of a larger <q>population</q>.
+    </p>
+
+<codeblock>[localhost:21000] &gt; select stddev(score) from test_scores;
++---------------+
+| stddev(score) |
++---------------+
+| 28.5          |
++---------------+
+[localhost:21000] &gt; select stddev_samp(score) from test_scores;
++--------------------+
+| stddev_samp(score) |
++--------------------+
+| 28.5               |
++--------------------+
+[localhost:21000] &gt; select stddev_pop(score) from test_scores;
++-------------------+
+| stddev_pop(score) |
++-------------------+
+| 28.4858           |
++-------------------+
+</codeblock>
+
+    <p>
+      This example demonstrates that, because the return value of these 
aggregate functions is a
+      <codeph>STRING</codeph>, you must currently convert the result with 
<codeph>CAST</codeph>.
+    </p>
+
+<codeblock>[localhost:21000] &gt; create table score_stats as select 
cast(stddev(score) as decimal(7,4)) `standard_deviation`, cast(variance(score) 
as decimal(7,4)) `variance` from test_scores;
++-------------------+
+| summary           |
++-------------------+
+| Inserted 1 row(s) |
++-------------------+
+[localhost:21000] &gt; desc score_stats;
++--------------------+--------------+---------+
+| name               | type         | comment |
++--------------------+--------------+---------+
+| standard_deviation | decimal(7,4) |         |
+| variance           | decimal(7,4) |         |
++--------------------+--------------+---------+
+</codeblock>
+
+    <p conref="../shared/impala_common.xml#common/restrictions_blurb"/>
+
+    <p 
conref="../shared/impala_common.xml#common/analytic_not_allowed_caveat"/>
+
+    <p conref="../shared/impala_common.xml#common/related_info"/>
+
+    <p>
+      The <codeph>STDDEV()</codeph>, <codeph>STDDEV_POP()</codeph>, and 
<codeph>STDDEV_SAMP()</codeph> functions
+      compute the standard deviation (square root of the variance) based on 
the results of
+      <codeph>VARIANCE()</codeph>, <codeph>VARIANCE_POP()</codeph>, and 
<codeph>VARIANCE_SAMP()</codeph>
+      respectively. See <xref href="impala_variance.xml#variance"/> for 
details about the variance property.
+    </p>
+  </conbody>
+</concept>

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/463ddf92/docs/topics/impala_string.xml
----------------------------------------------------------------------
diff --git a/docs/topics/impala_string.xml b/docs/topics/impala_string.xml
new file mode 100644
index 0000000..9ad77c3
--- /dev/null
+++ b/docs/topics/impala_string.xml
@@ -0,0 +1,161 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE concept PUBLIC "-//OASIS//DTD DITA Concept//EN" "concept.dtd">
+<concept id="string">
+
+  <title>STRING Data Type</title>
+  <titlealts><navtitle>STRING</navtitle></titlealts>
+  <prolog>
+    <metadata>
+      <data name="Category" value="Impala"/>
+      <data name="Category" value="Impala Data Types"/>
+      <data name="Category" value="SQL"/>
+      <data name="Category" value="Data Analysts"/>
+      <data name="Category" value="Developers"/>
+      <data name="Category" value="Schemas"/>
+    </metadata>
+  </prolog>
+
+  <conbody>
+
+    <p>
+      A data type used in <codeph>CREATE TABLE</codeph> and <codeph>ALTER 
TABLE</codeph> statements.
+    </p>
+
+    <p conref="../shared/impala_common.xml#common/syntax_blurb"/>
+
+    <p>
+      In the column definition of a <codeph>CREATE TABLE</codeph> statement:
+    </p>
+
+<codeblock><varname>column_name</varname> STRING</codeblock>
+
+    <p>
+      <b>Length:</b> Maximum of 32,767 bytes. Do not use any length constraint 
when declaring
+      <codeph>STRING</codeph> columns, as you might be familiar with from 
<codeph>VARCHAR</codeph>,
+      <codeph>CHAR</codeph>, or similar column types from relational database 
systems. <ph rev="2.0.0">If you do
+      need to manipulate string values with precise or maximum lengths, in 
Impala 2.0 and higher you can declare
+      columns as <codeph>VARCHAR(<varname>max_length</varname>)</codeph> or
+      <codeph>CHAR(<varname>length</varname>)</codeph>, but for best 
performance use <codeph>STRING</codeph>
+      where practical.</ph>
+    </p>
+
+    <p>
+      <b>Character sets:</b> For full support in all Impala subsystems, 
restrict string values to the ASCII
+      character set. UTF-8 character data can be stored in Impala and 
retrieved through queries, but UTF-8 strings
+      containing non-ASCII characters are not guaranteed to work properly with 
string manipulation functions,
+      comparison operators, or the <codeph>ORDER BY</codeph> clause. For any 
national language aspects such as
+      collation order or interpreting extended ASCII variants such as 
ISO-8859-1 or ISO-8859-2 encodings, Impala
+      does not include such metadata with the table definition. If you need to 
sort, manipulate, or display data
+      depending on those national language characteristics of string data, use 
logic on the application side.
+    </p>
+
+    <p>
+      <b>Conversions:</b>
+    </p>
+
+    <ul>
+      <li>
+        <p>
+          Impala does not automatically convert <codeph>STRING</codeph> to any 
numeric type. Impala does
+          automatically convert <codeph>STRING</codeph> to 
<codeph>TIMESTAMP</codeph> if the value matches one of
+          the accepted <codeph>TIMESTAMP</codeph> formats; see <xref 
href="impala_timestamp.xml#timestamp"/> for
+          details.
+        </p>
+      </li>
+
+      <li>
+        <p>
+          You can use <codeph>CAST()</codeph> to convert 
<codeph>STRING</codeph> values to
+          <codeph>TINYINT</codeph>, <codeph>SMALLINT</codeph>, 
<codeph>INT</codeph>, <codeph>BIGINT</codeph>,
+          <codeph>FLOAT</codeph>, <codeph>DOUBLE</codeph>, or 
<codeph>TIMESTAMP</codeph>.
+        </p>
+      </li>
+
+      <li>
+        <p>
+          You cannot directly cast a <codeph>STRING</codeph> value to 
<codeph>BOOLEAN</codeph>. You can use a
+          <codeph>CASE</codeph> expression to evaluate string values such as 
<codeph>'T'</codeph>,
+          <codeph>'true'</codeph>, and so on and return Boolean 
<codeph>true</codeph> and <codeph>false</codeph>
+          values as appropriate.
+        </p>
+      </li>
+
+      <li>
+        <p>
+          You can cast a <codeph>BOOLEAN</codeph> value to 
<codeph>STRING</codeph>, returning <codeph>'1'</codeph>
+          for <codeph>true</codeph> values and <codeph>'0'</codeph> for 
<codeph>false</codeph> values.
+        </p>
+      </li>
+    </ul>
+
+    <p conref="../shared/impala_common.xml#common/partitioning_blurb"/>
+
+    <p>
+      Although it might be convenient to use <codeph>STRING</codeph> columns 
for partition keys, even when those
+      columns contain numbers, for performance and scalability it is much 
better to use numeric columns as
+      partition keys whenever practical. Although the underlying HDFS 
directory name might be the same in either
+      case, the in-memory storage for the partition key columns is more 
compact, and computations are faster, if
+      partition key columns such as <codeph>YEAR</codeph>, 
<codeph>MONTH</codeph>, <codeph>DAY</codeph> and so on
+      are declared as <codeph>INT</codeph>, <codeph>SMALLINT</codeph>, and so 
on.
+    </p>
+
+    <p conref="../shared/impala_common.xml#common/zero_length_strings"/>
+
+<!-- <p conref="/Content/impala_common_xi44078.xml#common/hbase_blurb"/> -->
+
+<!-- <p conref="/Content/impala_common_xi44078.xml#common/parquet_blurb"/> -->
+
+    <p conref="../shared/impala_common.xml#common/text_bulky"/>
+
+<!-- <p 
conref="/Content/impala_common_xi44078.xml#common/compatibility_blurb"/> -->
+
+<!-- <p conref="/Content/impala_common_xi44078.xml#common/internals_blurb"/> 
-->
+
+<!-- <p conref="/Content/impala_common_xi44078.xml#common/added_in_20"/> -->
+
+    <p conref="../shared/impala_common.xml#common/column_stats_variable"/>
+
+<!-- <p 
conref="/Content/impala_common_xi44078.xml#common/restrictions_blurb"/> -->
+
+    <p conref="../shared/impala_common.xml#common/example_blurb"/>
+
+    <p>
+      The following examples demonstrate double-quoted and single-quoted 
string literals, and required escaping for
+      quotation marks within string literals:
+    </p>
+
+<codeblock>SELECT 'I am a single-quoted string';
+SELECT "I am a double-quoted string";
+SELECT 'I\'m a single-quoted string with an apostrophe';
+SELECT "I\'m a double-quoted string with an apostrophe";
+SELECT 'I am a "short" single-quoted string containing quotes';
+SELECT "I am a \"short\" double-quoted string containing quotes";
+</codeblock>
+
+    <p>
+      The following examples demonstrate calls to string manipulation 
functions to concatenate strings, convert
+      numbers to strings, or pull out substrings:
+    </p>
+
+<codeblock>SELECT CONCAT("Once upon a time, there were ", CAST(3 AS STRING), ' 
little pigs.');
+SELECT SUBSTR("hello world",7,5);
+</codeblock>
+
+    <p>
+      The following examples show how to perform operations on 
<codeph>STRING</codeph> columns within a table:
+    </p>
+
+<codeblock>CREATE TABLE t1 (s1 STRING, s2 STRING);
+INSERT INTO t1 VALUES ("hello", 'world'), (CAST(7 AS STRING), "wonders");
+SELECT s1, s2, length(s1) FROM t1 WHERE s2 LIKE 'w%';
+</codeblock>
+
+    <p conref="../shared/impala_common.xml#common/related_info"/>
+
+    <p>
+      <xref href="impala_literals.xml#string_literals"/>, <xref 
href="impala_char.xml#char"/>,
+      <xref href="impala_varchar.xml#varchar"/>, <xref 
href="impala_string_functions.xml#string_functions"/>,
+      <xref href="impala_datetime_functions.xml#datetime_functions"/>
+    </p>
+  </conbody>
+</concept>

[05/22] incubator-impala git commit: First try at porting over the source files necessary for the Impala SQL Reference.

Reply via email to