[16/22] incubator-impala git commit: First try at porting over the source files necessary for the Impala SQL Reference.

jrussell Tue, 26 Jul 2016 16:05:54 -0700

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/463ddf92/docs/topics/impala_compression_codec.xml
----------------------------------------------------------------------
diff --git a/docs/topics/impala_compression_codec.xml 
b/docs/topics/impala_compression_codec.xml
new file mode 100644
index 0000000..d99ac04
--- /dev/null
+++ b/docs/topics/impala_compression_codec.xml
@@ -0,0 +1,95 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE concept PUBLIC "-//OASIS//DTD DITA Concept//EN" "concept.dtd">
+<concept rev="2.0.0" id="compression_codec">
+
+  <title>COMPRESSION_CODEC Query Option</title>
+  <prolog>
+    <metadata>
+      <data name="Category" value="Impala"/>
+      <data name="Category" value="Impala Query Options"/>
+      <data name="Category" value="Compression"/>
+      <data name="Category" value="File Formats"/>
+      <data name="Category" value="Parquet"/>
+      <data name="Category" value="Snappy"/>
+      <data name="Category" value="GZip"/>
+    </metadata>
+  </prolog>
+
+  <conbody>
+
+<!-- The initial part of this paragraph is copied straight from the 
#parquet_compression topic. -->
+
+<!-- Could turn into a conref. -->
+
+    <p>
+      <indexterm audience="Cloudera">COMPRESSION_CODEC query option</indexterm>
+      When Impala writes Parquet data files using the <codeph>INSERT</codeph> 
statement, the underlying compression
+      is controlled by the <codeph>COMPRESSION_CODEC</codeph> query option.
+    </p>
+
+    <note>
+      Prior to Impala 2.0, this option was named 
<codeph>PARQUET_COMPRESSION_CODEC</codeph>. In Impala 2.0 and
+      later, the <codeph>PARQUET_COMPRESSION_CODEC</codeph> name is not 
recognized. Use the more general name
+      <codeph>COMPRESSION_CODEC</codeph> for new code.
+    </note>
+
+    <p conref="../shared/impala_common.xml#common/syntax_blurb"/>
+
+<codeblock>SET COMPRESSION_CODEC=<varname>codec_name</varname>;</codeblock>
+
+    <p>
+      The allowed values for this query option are <codeph>SNAPPY</codeph> 
(the default), <codeph>GZIP</codeph>,
+      and <codeph>NONE</codeph>.
+    </p>
+
+    <note>
+      A Parquet file created with <codeph>COMPRESSION_CODEC=NONE</codeph> is 
still typically smaller than the
+      original data, due to encoding schemes such as run-length encoding and 
dictionary encoding that are applied
+      separately from compression.
+    </note>
+
+    <p></p>
+
+    <p>
+      The option value is not case-sensitive.
+    </p>
+
+    <p>
+      If the option is set to an unrecognized value, all kinds of queries will 
fail due to the invalid option
+      setting, not just queries involving Parquet tables. (The value 
<codeph>BZIP2</codeph> is also recognized, but
+      is not compatible with Parquet tables.)
+    </p>
+
+    <p>
+      <b>Type:</b> <codeph>STRING</codeph>
+    </p>
+
+    <p>
+      <b>Default:</b> SNAPPY
+    </p>
+
+
+    <p conref="../shared/impala_common.xml#common/example_blurb"/>
+
+<codeblock>set compression_codec=gzip;
+insert into parquet_table_highly_compressed select * from t1;
+
+set compression_codec=snappy;
+insert into parquet_table_compression_plus_fast_queries select * from t1;
+
+set compression_codec=none;
+insert into parquet_table_no_compression select * from t1;
+
+set compression_codec=foo;
+select * from t1 limit 5;
+ERROR: Invalid compression codec: foo
+</codeblock>
+
+    <p conref="../shared/impala_common.xml#common/related_info"/>
+
+    <p>
+      For information about how compressing Parquet data files affects query 
performance, see
+      <xref href="impala_parquet.xml#parquet_compression"/>.
+    </p>
+  </conbody>
+</concept>


http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/463ddf92/docs/topics/impala_compute_stats.xml
----------------------------------------------------------------------
diff --git a/docs/topics/impala_compute_stats.xml 
b/docs/topics/impala_compute_stats.xml
new file mode 100644
index 0000000..abf6645
--- /dev/null
+++ b/docs/topics/impala_compute_stats.xml
@@ -0,0 +1,418 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE concept PUBLIC "-//OASIS//DTD DITA Concept//EN" "concept.dtd">
+<concept rev="1.2.2" id="compute_stats">
+
+  <title>COMPUTE STATS Statement</title>
+  <titlealts><navtitle>COMPUTE STATS</navtitle></titlealts>
+  <prolog>
+    <metadata>
+      <data name="Category" value="Impala"/>
+      <data name="Category" value="Performance"/>
+      <data name="Category" value="Scalability"/>
+      <data name="Category" value="SQL"/>
+      <data name="Category" value="Tables"/>
+    </metadata>
+  </prolog>
+
+  <conbody>
+
+    <p>
+      <indexterm audience="Cloudera">COMPUTE STATS statement</indexterm>
+      Gathers information about volume and distribution of data in a table and 
all associated columns and
+      partitions. The information is stored in the metastore database, and 
used by Impala to help optimize queries.
+      For example, if Impala can determine that a table is large or small, or 
has many or few distinct values it
+      can organize parallelize the work appropriately for a join query or 
insert operation. For details about the
+      kinds of information gathered by this statement, see <xref 
href="impala_perf_stats.xml#perf_stats"/>.
+    </p>
+
+    <p conref="../shared/impala_common.xml#common/syntax_blurb"/>
+
+<codeblock rev="2.1.0">COMPUTE STATS 
[<varname>db_name</varname>.]<varname>table_name</varname>
+COMPUTE INCREMENTAL STATS 
[<varname>db_name</varname>.]<varname>table_name</varname> [PARTITION 
(<varname>partition_spec</varname>)]
+
+<varname>partition_spec</varname> ::= 
<varname>partition_col</varname>=<varname>constant_value</varname>
+</codeblock>
+
+    <p conref="../shared/impala_common.xml#common/incremental_partition_spec"/>
+
+    <p conref="../shared/impala_common.xml#common/usage_notes_blurb"/>
+
+    <p>
+      Originally, Impala relied on users to run the Hive <codeph>ANALYZE 
TABLE</codeph> statement, but that method
+      of gathering statistics proved unreliable and difficult to use. The 
Impala <codeph>COMPUTE STATS</codeph>
+      statement is built from the ground up to improve the reliability and 
user-friendliness of this operation.
+      <codeph>COMPUTE STATS</codeph> does not require any setup steps or 
special configuration. You only run a
+      single Impala <codeph>COMPUTE STATS</codeph> statement to gather both 
table and column statistics, rather
+      than separate Hive <codeph>ANALYZE TABLE</codeph> statements for each 
kind of statistics.
+    </p>
+
+    <p rev="2.1.0">
+      The <codeph>COMPUTE INCREMENTAL STATS</codeph> variation is a shortcut 
for partitioned tables that works on a
+      subset of partitions rather than the entire table. The incremental 
nature makes it suitable for large tables
+      with many partitions, where a full <codeph>COMPUTE STATS</codeph> 
operation takes too long to be practical
+      each time a partition is added or dropped. See <xref 
href="impala_perf_stats.xml#perf_stats_incremental"/>
+      for full usage details.
+    </p>
+
+    <p>
+      <codeph>COMPUTE INCREMENTAL STATS</codeph> only applies to partitioned 
tables. If you use the
+      <codeph>INCREMENTAL</codeph> clause for an unpartitioned table, Impala 
automatically uses the original
+      <codeph>COMPUTE STATS</codeph> statement. Such tables display 
<codeph>false</codeph> under the
+      <codeph>Incremental stats</codeph> column of the <codeph>SHOW TABLE 
STATS</codeph> output.
+    </p>
+
+    <note>
+      Because many of the most performance-critical and resource-intensive 
operations rely on table and column
+      statistics to construct accurate and efficient plans, <codeph>COMPUTE 
STATS</codeph> is an important step at
+      the end of your ETL process. Run <codeph>COMPUTE STATS</codeph> on all 
tables as your first step during
+      performance tuning for slow queries, or troubleshooting for 
out-of-memory conditions:
+      <ul>
+        <li>
+          Accurate statistics help Impala construct an efficient query plan 
for join queries, improving performance
+          and reducing memory usage.
+        </li>
+
+        <li>
+          Accurate statistics help Impala distribute the work effectively for 
insert operations into Parquet
+          tables, improving performance and reducing memory usage.
+        </li>
+
+        <li rev="1.3.0">
+          Accurate statistics help Impala estimate the memory required for 
each query, which is important when you
+          use resource management features, such as admission control and the 
YARN resource management framework.
+          The statistics help Impala to achieve high concurrency, full 
utilization of available memory, and avoid
+          contention with workloads from other Hadoop components.
+        </li>
+      </ul>
+    </note>
+
+    <p conref="../shared/impala_common.xml#common/complex_types_blurb"/>
+
+    <p rev="2.3.0">
+      Currently, the statistics created by the <codeph>COMPUTE STATS</codeph> 
statement do not include
+      information about complex type columns. The column stats metrics for 
complex columns are always shown
+      as -1. For queries involving complex type columns, Impala uses
+      heuristics to estimate the data distribution within such columns.
+    </p>
+
+    <p conref="../shared/impala_common.xml#common/hbase_blurb"/>
+
+    <p>
+      <codeph>COMPUTE STATS</codeph> works for HBase tables also. The 
statistics gathered for HBase tables are
+      somewhat different than for HDFS-backed tables, but that metadata is 
still used for optimization when HBase
+      tables are involved in join queries.
+    </p>
+
+    <p conref="../shared/impala_common.xml#common/s3_blurb"/>
+
+    <p rev="2.2.0">
+      <codeph>COMPUTE STATS</codeph> also works for tables where data resides 
in the Amazon Simple Storage Service (S3).
+      See <xref href="impala_s3.xml#s3"/> for details.
+    </p>
+
+    <p conref="../shared/impala_common.xml#common/performance_blurb"/>
+
+    <p>
+      The statistics collected by <codeph>COMPUTE STATS</codeph> are used to 
optimize join queries
+      <codeph>INSERT</codeph> operations into Parquet tables, and other 
resource-intensive kinds of SQL statements.
+      See <xref href="impala_perf_stats.xml#perf_stats"/> for details.
+    </p>
+
+    <p conref="../shared/impala_common.xml#common/example_blurb"/>
+
+    <p>
+      This example shows two tables, <codeph>T1</codeph> and 
<codeph>T2</codeph>, with a small number distinct
+      values linked by a parent-child relationship between 
<codeph>T1.ID</codeph> and <codeph>T2.PARENT</codeph>.
+      <codeph>T1</codeph> is tiny, while <codeph>T2</codeph> has approximately 
100K rows. Initially, the statistics
+      includes physical measurements such as the number of files, the total 
size, and size measurements for
+      fixed-length columns such as with the <codeph>INT</codeph> type. Unknown 
values are represented by -1. After
+      running <codeph>COMPUTE STATS</codeph> for each table, much more 
information is available through the
+      <codeph>SHOW STATS</codeph> statements. If you were running a join query 
involving both of these tables, you
+      would need statistics for both tables to get the most effective 
optimization for the query.
+    </p>
+
+<!-- Note: chopped off any excess characters at position 87 and after,
+           to avoid weird wrapping in PDF.
+           Applies to any subsequent examples with output from SHOW ... STATS 
too. -->
+
+<codeblock>[localhost:21000] &gt; show table stats t1;
+Query: show table stats t1
++-------+--------+------+--------+
+| #Rows | #Files | Size | Format |
++-------+--------+------+--------+
+| -1    | 1      | 33B  | TEXT   |
++-------+--------+------+--------+
+Returned 1 row(s) in 0.02s
+[localhost:21000] &gt; show table stats t2;
+Query: show table stats t2
++-------+--------+----------+--------+
+| #Rows | #Files | Size     | Format |
++-------+--------+----------+--------+
+| -1    | 28     | 960.00KB | TEXT   |
++-------+--------+----------+--------+
+Returned 1 row(s) in 0.01s
+[localhost:21000] &gt; show column stats t1;
+Query: show column stats t1
++--------+--------+------------------+--------+----------+----------+
+| Column | Type   | #Distinct Values | #Nulls | Max Size | Avg Size |
++--------+--------+------------------+--------+----------+----------+
+| id     | INT    | -1               | -1     | 4        | 4        |
+| s      | STRING | -1               | -1     | -1       | -1       |
++--------+--------+------------------+--------+----------+----------+
+Returned 2 row(s) in 1.71s
+[localhost:21000] &gt; show column stats t2;
+Query: show column stats t2
++--------+--------+------------------+--------+----------+----------+
+| Column | Type   | #Distinct Values | #Nulls | Max Size | Avg Size |
++--------+--------+------------------+--------+----------+----------+
+| parent | INT    | -1               | -1     | 4        | 4        |
+| s      | STRING | -1               | -1     | -1       | -1       |
++--------+--------+------------------+--------+----------+----------+
+Returned 2 row(s) in 0.01s
+[localhost:21000] &gt; compute stats t1;
+Query: compute stats t1
++-----------------------------------------+
+| summary                                 |
++-----------------------------------------+
+| Updated 1 partition(s) and 2 column(s). |
++-----------------------------------------+
+Returned 1 row(s) in 5.30s
+[localhost:21000] &gt; show table stats t1;
+Query: show table stats t1
++-------+--------+------+--------+
+| #Rows | #Files | Size | Format |
++-------+--------+------+--------+
+| 3     | 1      | 33B  | TEXT   |
++-------+--------+------+--------+
+Returned 1 row(s) in 0.01s
+[localhost:21000] &gt; show column stats t1;
+Query: show column stats t1
++--------+--------+------------------+--------+----------+----------+
+| Column | Type   | #Distinct Values | #Nulls | Max Size | Avg Size |
++--------+--------+------------------+--------+----------+----------+
+| id     | INT    | 3                | -1     | 4        | 4        |
+| s      | STRING | 3                | -1     | -1       | -1       |
++--------+--------+------------------+--------+----------+----------+
+Returned 2 row(s) in 0.02s
+[localhost:21000] &gt; compute stats t2;
+Query: compute stats t2
++-----------------------------------------+
+| summary                                 |
++-----------------------------------------+
+| Updated 1 partition(s) and 2 column(s). |
++-----------------------------------------+
+Returned 1 row(s) in 5.70s
+[localhost:21000] &gt; show table stats t2;
+Query: show table stats t2
++-------+--------+----------+--------+
+| #Rows | #Files | Size     | Format |
++-------+--------+----------+--------+
+| 98304 | 1      | 960.00KB | TEXT   |
++-------+--------+----------+--------+
+Returned 1 row(s) in 0.03s
+[localhost:21000] &gt; show column stats t2;
+Query: show column stats t2
++--------+--------+------------------+--------+----------+----------+
+| Column | Type   | #Distinct Values | #Nulls | Max Size | Avg Size |
++--------+--------+------------------+--------+----------+----------+
+| parent | INT    | 3                | -1     | 4        | 4        |
+| s      | STRING | 6                | -1     | 14       | 9.3      |
++--------+--------+------------------+--------+----------+----------+
+Returned 2 row(s) in 0.01s</codeblock>
+
+    <p rev="2.1.0">
+      The following example shows how to use the <codeph>INCREMENTAL</codeph> 
clause, available in Impala 2.1.0 and
+      higher. The <codeph>COMPUTE INCREMENTAL STATS</codeph> syntax lets you 
collect statistics for newly added or
+      changed partitions, without rescanning the entire table.
+    </p>
+
+<codeblock>-- Initially the table has no incremental stats, as indicated
+-- by -1 under #Rows and false under Incremental stats.
+show table stats item_partitioned;
++-------------+-------+--------+----------+--------------+---------+------------------
+| i_category  | #Rows | #Files | Size     | Bytes Cached | Format  | 
Incremental stats
++-------------+-------+--------+----------+--------------+---------+------------------
+| Books       | -1    | 1      | 223.74KB | NOT CACHED   | PARQUET | false
+| Children    | -1    | 1      | 230.05KB | NOT CACHED   | PARQUET | false
+| Electronics | -1    | 1      | 232.67KB | NOT CACHED   | PARQUET | false
+| Home        | -1    | 1      | 232.56KB | NOT CACHED   | PARQUET | false
+| Jewelry     | -1    | 1      | 223.72KB | NOT CACHED   | PARQUET | false
+| Men         | -1    | 1      | 231.25KB | NOT CACHED   | PARQUET | false
+| Music       | -1    | 1      | 237.90KB | NOT CACHED   | PARQUET | false
+| Shoes       | -1    | 1      | 234.90KB | NOT CACHED   | PARQUET | false
+| Sports      | -1    | 1      | 227.97KB | NOT CACHED   | PARQUET | false
+| Women       | -1    | 1      | 226.27KB | NOT CACHED   | PARQUET | false
+| Total       | -1    | 10     | 2.25MB   | 0B           |         |
++-------------+-------+--------+----------+--------------+---------+------------------
+
+-- After the first COMPUTE INCREMENTAL STATS,
+-- all partitions have stats.
+compute incremental stats item_partitioned;
++-------------------------------------------+
+| summary                                   |
++-------------------------------------------+
+| Updated 10 partition(s) and 21 column(s). |
++-------------------------------------------+
+show table stats item_partitioned;
++-------------+-------+--------+----------+--------------+---------+------------------
+| i_category  | #Rows | #Files | Size     | Bytes Cached | Format  | 
Incremental stats
++-------------+-------+--------+----------+--------------+---------+------------------
+| Books       | 1733  | 1      | 223.74KB | NOT CACHED   | PARQUET | true
+| Children    | 1786  | 1      | 230.05KB | NOT CACHED   | PARQUET | true
+| Electronics | 1812  | 1      | 232.67KB | NOT CACHED   | PARQUET | true
+| Home        | 1807  | 1      | 232.56KB | NOT CACHED   | PARQUET | true
+| Jewelry     | 1740  | 1      | 223.72KB | NOT CACHED   | PARQUET | true
+| Men         | 1811  | 1      | 231.25KB | NOT CACHED   | PARQUET | true
+| Music       | 1860  | 1      | 237.90KB | NOT CACHED   | PARQUET | true
+| Shoes       | 1835  | 1      | 234.90KB | NOT CACHED   | PARQUET | true
+| Sports      | 1783  | 1      | 227.97KB | NOT CACHED   | PARQUET | true
+| Women       | 1790  | 1      | 226.27KB | NOT CACHED   | PARQUET | true
+| Total       | 17957 | 10     | 2.25MB   | 0B           |         |
++-------------+-------+--------+----------+--------------+---------+------------------
+
+-- Add a new partition...
+alter table item_partitioned add partition (i_category='Camping');
+-- Add or replace files in HDFS outside of Impala,
+-- rendering the stats for a partition obsolete.
+!import_data_into_sports_partition.sh
+refresh item_partitioned;
+drop incremental stats item_partitioned partition (i_category='Sports');
+-- Now some partitions have incremental stats
+-- and some don't.
+show table stats item_partitioned;
++-------------+-------+--------+----------+--------------+---------+------------------
+| i_category  | #Rows | #Files | Size     | Bytes Cached | Format  | 
Incremental stats
++-------------+-------+--------+----------+--------------+---------+------------------
+| Books       | 1733  | 1      | 223.74KB | NOT CACHED   | PARQUET | true
+| Camping     | -1    | 1      | 408.02KB | NOT CACHED   | PARQUET | false
+| Children    | 1786  | 1      | 230.05KB | NOT CACHED   | PARQUET | true
+| Electronics | 1812  | 1      | 232.67KB | NOT CACHED   | PARQUET | true
+| Home        | 1807  | 1      | 232.56KB | NOT CACHED   | PARQUET | true
+| Jewelry     | 1740  | 1      | 223.72KB | NOT CACHED   | PARQUET | true
+| Men         | 1811  | 1      | 231.25KB | NOT CACHED   | PARQUET | true
+| Music       | 1860  | 1      | 237.90KB | NOT CACHED   | PARQUET | true
+| Shoes       | 1835  | 1      | 234.90KB | NOT CACHED   | PARQUET | true
+| Sports      | -1    | 1      | 227.97KB | NOT CACHED   | PARQUET | false
+| Women       | 1790  | 1      | 226.27KB | NOT CACHED   | PARQUET | true
+| Total       | 17957 | 11     | 2.65MB   | 0B           |         |
++-------------+-------+--------+----------+--------------+---------+------------------
+
+-- After another COMPUTE INCREMENTAL STATS,
+-- all partitions have incremental stats, and only the 2
+-- partitions without incremental stats were scanned.
+compute incremental stats item_partitioned;
++------------------------------------------+
+| summary                                  |
++------------------------------------------+
+| Updated 2 partition(s) and 21 column(s). |
++------------------------------------------+
+show table stats item_partitioned;
++-------------+-------+--------+----------+--------------+---------+------------------
+| i_category  | #Rows | #Files | Size     | Bytes Cached | Format  | 
Incremental stats
++-------------+-------+--------+----------+--------------+---------+------------------
+| Books       | 1733  | 1      | 223.74KB | NOT CACHED   | PARQUET | true
+| Camping     | 5328  | 1      | 408.02KB | NOT CACHED   | PARQUET | true
+| Children    | 1786  | 1      | 230.05KB | NOT CACHED   | PARQUET | true
+| Electronics | 1812  | 1      | 232.67KB | NOT CACHED   | PARQUET | true
+| Home        | 1807  | 1      | 232.56KB | NOT CACHED   | PARQUET | true
+| Jewelry     | 1740  | 1      | 223.72KB | NOT CACHED   | PARQUET | true
+| Men         | 1811  | 1      | 231.25KB | NOT CACHED   | PARQUET | true
+| Music       | 1860  | 1      | 237.90KB | NOT CACHED   | PARQUET | true
+| Shoes       | 1835  | 1      | 234.90KB | NOT CACHED   | PARQUET | true
+| Sports      | 1783  | 1      | 227.97KB | NOT CACHED   | PARQUET | true
+| Women       | 1790  | 1      | 226.27KB | NOT CACHED   | PARQUET | true
+| Total       | 17957 | 11     | 2.65MB   | 0B           |         |
++-------------+-------+--------+----------+--------------+---------+------------------
+</codeblock>
+
+    <p conref="../shared/impala_common.xml#common/file_format_blurb"/>
+
+    <p>
+      The <codeph>COMPUTE STATS</codeph> statement works with tables created 
with any of the file formats supported
+      by Impala. See <xref href="impala_file_formats.xml#file_formats"/> for 
details about working with the
+      different file formats. The following considerations apply to 
<codeph>COMPUTE STATS</codeph> depending on the
+      file format of the table.
+    </p>
+
+    <p>
+      The <codeph>COMPUTE STATS</codeph> statement works with text tables with 
no restrictions. These tables can be
+      created through either Impala or Hive.
+    </p>
+
+    <p>
+      The <codeph>COMPUTE STATS</codeph> statement works with Parquet tables. 
These tables can be created through
+      either Impala or Hive.
+      <note conref="../shared/impala_common.xml#common/compute_stats_parquet"/>
+    </p>
+
+    <p>
+      The <codeph>COMPUTE STATS</codeph> statement works with Avro tables, as 
long as they are created with
+      SQL-style column names and types rather than an Avro-style schema 
specification. These tables are currently
+      always created through Hive rather than Impala.
+    </p>
+
+    <p>
+      The <codeph>COMPUTE STATS</codeph> statement works with RCFile tables 
with no restrictions. These tables can
+      be created through either Impala or Hive.
+    </p>
+
+    <p>
+      The <codeph>COMPUTE STATS</codeph> statement works with SequenceFile 
tables with no restrictions. These
+      tables can be created through either Impala or Hive.
+    </p>
+
+    <p>
+      The <codeph>COMPUTE STATS</codeph> statement works with partitioned 
tables, whether all the partitions use
+      the same file format, or some partitions are defined through 
<codeph>ALTER TABLE</codeph> to use different
+      file formats.
+    </p>
+
+    <p conref="../shared/impala_common.xml#common/ddl_blurb"/>
+
+    <p conref="../shared/impala_common.xml#common/cancel_blurb_maybe"/>
+
+    <p conref="../shared/impala_common.xml#common/restrictions_blurb"/>
+
+    <p conref="../shared/impala_common.xml#common/decimal_no_stats"/>
+
+    <note conref="../shared/impala_common.xml#common/compute_stats_nulls"/>
+
+    <p conref="../shared/impala_common.xml#common/internals_blurb"/>
+    <p>
+      Behind the scenes, the <codeph>COMPUTE STATS</codeph> statement
+      executes two statements: one to count the rows of each partition
+      in the table (or the entire table if unpartitioned) through the
+      <codeph>COUNT(*)</codeph> function,
+      and another to count the approximate number of distinct values
+      in each column through the <codeph>NDV()</codeph> function.
+      You might see these queries in your monitoring and diagnostic displays.
+      The same factors that affect the performance, scalability, and
+      execution of other queries (such as parallel execution, memory usage,
+      admission control, and timeouts) also apply to the queries run by the
+      <codeph>COMPUTE STATS</codeph> statement.
+    </p>
+
+    <p conref="../shared/impala_common.xml#common/permissions_blurb"/>
+    <p rev="CDH-19187">
+      The user ID that the <cmdname>impalad</cmdname> daemon runs under,
+      typically the <codeph>impala</codeph> user, must have read
+      permission for all affected files in the source directory:
+      all files in the case of an unpartitioned table or
+      a partitioned table in the case of <codeph>COMPUTE STATS</codeph>;
+      or all the files in partitions without incremental stats in
+      the case of <codeph>COMPUTE INCREMENTAL STATS</codeph>.
+      It must also have read and execute permissions for all
+      relevant directories holding the data files.
+      (Essentially, <codeph>COMPUTE STATS</codeph> requires the
+      same permissions as the underlying <codeph>SELECT</codeph> queries it 
runs
+      against the table.)
+    </p>
+
+    <p conref="../shared/impala_common.xml#common/related_info"/>
+
+    <p>
+      <xref href="impala_drop_stats.xml#drop_stats"/>, <xref 
href="impala_show.xml#show_table_stats"/>,
+      <xref href="impala_show.xml#show_column_stats"/>, <xref 
href="impala_perf_stats.xml#perf_stats"/>
+    </p>
+  </conbody>
+</concept>

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/463ddf92/docs/topics/impala_conditional_functions.xml
----------------------------------------------------------------------
diff --git a/docs/topics/impala_conditional_functions.xml 
b/docs/topics/impala_conditional_functions.xml
new file mode 100644
index 0000000..b922710
--- /dev/null
+++ b/docs/topics/impala_conditional_functions.xml
@@ -0,0 +1,443 @@
+<?xml version="1.0" encoding="UTF-8"?><!DOCTYPE concept PUBLIC "-//OASIS//DTD 
DITA Concept//EN" "concept.dtd">
+<concept id="conditional_functions">
+
+  <title>Impala Conditional Functions</title>
+  <titlealts><navtitle>Conditional Functions</navtitle></titlealts>
+  <prolog>
+    <metadata>
+      <data name="Category" value="Impala"/>
+      <data name="Category" value="Impala Functions"/>
+      <data name="Category" value="SQL"/>
+      <data name="Category" value="Data Analysts"/>
+      <data name="Category" value="Developers"/>
+      <data name="Category" value="Querying"/>
+    </metadata>
+  </prolog>
+
+  <conbody>
+
+    <p>
+      Impala supports the following conditional functions for testing 
equality, comparison operators, and nullity:
+    </p>
+
+    <dl>
+      <dlentry id="case">
+
+        <dt>
+          <codeph>CASE a WHEN b THEN c [WHEN d THEN e]... [ELSE f] END</codeph>
+        </dt>
+
+        <dd>
+          <indexterm audience="Cloudera">CASE expression</indexterm>
+          <b>Purpose:</b> Compares an expression to one or more possible 
values, and returns a corresponding result
+          when a match is found.
+          <p conref="../shared/impala_common.xml#common/return_same_type"/>
+          <p conref="../shared/impala_common.xml#common/usage_notes_blurb"/>
+          <p>
+            In this form of the <codeph>CASE</codeph> expression, the initial 
value <codeph>A</codeph>
+            being evaluated for each row it typically a column reference, or 
an expression involving
+            a column. This form can only compare against a set of specified 
values, not ranges,
+            multi-value comparisons such as <codeph>BETWEEN</codeph> or 
<codeph>IN</codeph>,
+            regular expressions, or <codeph>NULL</codeph>.
+          </p>
+          <p conref="../shared/impala_common.xml#common/example_blurb"/>
+          <p>
+            Although this example is split across multiple lines, you can put 
any or all parts of a <codeph>CASE</codeph> expression
+            on a single line, with no punctuation or other separators between 
the <codeph>WHEN</codeph>,
+            <codeph>ELSE</codeph>, and <codeph>END</codeph> clauses.
+          </p>
+<codeblock>select case x
+    when 1 then 'one'
+    when 2 then 'two'
+    when 0 then 'zero'
+    else 'out of range'
+  end
+    from t1;
+</codeblock>
+        </dd>
+
+      </dlentry>
+
+      <dlentry id="case2">
+
+        <dt>
+          <codeph>CASE WHEN a THEN b [WHEN c THEN d]... [ELSE e] END</codeph>
+        </dt>
+
+        <dd>
+          <indexterm audience="Cloudera">CASE expression</indexterm>
+          <b>Purpose:</b> Tests whether any of a sequence of expressions is 
true, and returns a corresponding
+          result for the first true expression.
+          <p conref="../shared/impala_common.xml#common/return_same_type"/>
+          <p conref="../shared/impala_common.xml#common/usage_notes_blurb"/>
+          <p>
+            <codeph>CASE</codeph> expressions without an initial test value 
have more flexibility.
+            For example, they can test different columns in different 
<codeph>WHEN</codeph> clauses,
+            or use comparison operators such as <codeph>BETWEEN</codeph>, 
<codeph>IN</codeph> and <codeph>IS NULL</codeph>
+            rather than comparing against discrete values.
+          </p>
+          <p>
+            <codeph>CASE</codeph> expressions are often the foundation of long 
queries that
+            summarize and format results for easy-to-read reports. For 
example, you might
+            use a <codeph>CASE</codeph> function call to turn values from a 
numeric column
+            into category strings corresponding to integer values, or labels 
such as <q>Small</q>,
+            <q>Medium</q> and <q>Large</q> based on ranges. Then subsequent 
parts of the
+            query might aggregate based on the transformed values, such as how 
many
+            values are classified as small, medium, or large. You can also use 
<codeph>CASE</codeph>
+            to signal problems with out-of-bounds values, 
<codeph>NULL</codeph> values,
+            and so on.
+          </p>
+          <p>
+            By using operators such as <codeph>OR</codeph>, 
<codeph>IN</codeph>,
+            <codeph>REGEXP</codeph>, and so on in <codeph>CASE</codeph> 
expressions,
+            you can build extensive tests and transformations into a single 
query.
+            Therefore, applications that construct SQL statements often rely 
heavily on <codeph>CASE</codeph>
+            calls in the generated SQL code.
+          </p>
+          <p>
+            Because this flexible form of the <codeph>CASE</codeph> 
expressions allows you to perform
+            many comparisons and call multiple functions when evaluating each 
row, be careful applying
+            elaborate <codeph>CASE</codeph> expressions to queries that 
process large amounts of data.
+            For example, when practical, evaluate and transform values through 
<codeph>CASE</codeph>
+            after applying operations such as aggregations that reduce the 
size of the result set;
+            transform numbers to strings after performing joins with the 
original numeric values.
+          </p>
+          <p conref="../shared/impala_common.xml#common/example_blurb"/>
+          <p>
+            Although this example is split across multiple lines, you can put 
any or all parts of a <codeph>CASE</codeph> expression
+            on a single line, with no punctuation or other separators between 
the <codeph>WHEN</codeph>,
+            <codeph>ELSE</codeph>, and <codeph>END</codeph> clauses.
+          </p>
+<codeblock>select case
+    when dayname(now()) in ('Saturday','Sunday') then 'result undefined on 
weekends'
+    when x > y then 'x greater than y'
+    when x = y then 'x and y are equal'
+    when x is null or y is null then 'one of the columns is null'
+    else null
+  end
+    from t1;
+</codeblock>
+        </dd>
+
+      </dlentry>
+
+      <dlentry id="coalesce">
+
+        <dt>
+          <codeph>coalesce(type v1, type v2, ...)</codeph>
+        </dt>
+
+        <dd>
+          <indexterm audience="Cloudera">coalesce() function</indexterm>
+          <b>Purpose:</b> Returns the first specified argument that is not 
<codeph>NULL</codeph>, or
+          <codeph>NULL</codeph> if all arguments are <codeph>NULL</codeph>.
+          <p conref="../shared/impala_common.xml#common/return_same_type"/>
+        </dd>
+
+      </dlentry>
+
+      <dlentry rev="2.0.0" id="decode">
+
+        <dt>
+          <codeph>decode(type expression, type search1, type result1 [, type 
search2, type result2 ...] [, type
+          default] )</codeph>
+        </dt>
+
+        <dd>
+          <indexterm audience="Cloudera">decode() function</indexterm>
+          <b>Purpose:</b> Compares an expression to one or more possible 
values, and returns a corresponding result
+          when a match is found.
+          <p conref="../shared/impala_common.xml#common/return_same_type"/>
+          <p conref="../shared/impala_common.xml#common/usage_notes_blurb"/>
+          <p>
+            Can be used as shorthand for a <codeph>CASE</codeph> expression.
+          </p>
+          <p>
+            The original expression and the search expressions must of the 
same type or convertible types. The
+            result expression can be a different type, but all result 
expressions must be of the same type.
+          </p>
+          <p>
+            Returns a successful match If the original expression is 
<codeph>NULL</codeph> and a search expression
+            is also <codeph>NULL</codeph>. the
+          </p>
+          <p>
+            Returns <codeph>NULL</codeph> if the final 
<codeph>default</codeph> value is omitted and none of the
+            search expressions match the original expression.
+          </p>
+          <p conref="../shared/impala_common.xml#common/example_blurb"/>
+          <p>
+            The following example translates numeric day values into 
descriptive names:
+          </p>
+<codeblock>SELECT event, decode(day_of_week, 1, "Monday", 2, "Tuesday", 3, 
"Wednesday",
+  4, "Thursday", 5, "Friday", 6, "Saturday", 7, "Sunday", "Unknown day")
+  FROM calendar;
+</codeblock>
+        </dd>
+
+      </dlentry>
+
+      <dlentry id="if">
+
+        <dt>
+          <codeph>if(boolean condition, type ifTrue, type 
ifFalseOrNull)</codeph>
+        </dt>
+
+        <dd>
+          <indexterm audience="Cloudera">if() function</indexterm>
+          <b>Purpose:</b> Tests an expression and returns a corresponding 
result depending on whether the result is
+          true, false, or <codeph>NULL</codeph>.
+          <p>
+            <b>Return type:</b> Same as the <codeph>ifTrue</codeph> argument 
value
+          </p>
+        </dd>
+
+      </dlentry>
+
+      <dlentry rev="1.3.0" id="ifnull">
+
+        <dt>
+          <codeph>ifnull(type a, type ifNotNull)</codeph>
+        </dt>
+
+        <dd>
+          <indexterm audience="Cloudera">isnull() function</indexterm>
+          <b>Purpose:</b> Alias for the <codeph>isnull()</codeph> function, 
with the same behavior. To simplify
+          porting SQL with vendor extensions to Impala.
+          <p conref="../shared/impala_common.xml#common/added_in_130"/>
+        </dd>
+
+      </dlentry>
+
+      <dlentry id="isfalse" rev="2.2.0">
+
+        <dt>
+          <codeph>isfalse(<varname>boolean</varname>)</codeph>
+        </dt>
+
+        <dd>
+          <indexterm audience="Cloudera">isfalse() function</indexterm>
+          <b>Purpose:</b> Tests if a Boolean expression is 
<codeph>false</codeph> or not.
+          Returns <codeph>true</codeph> if so.
+          If the argument is <codeph>NULL</codeph>, returns 
<codeph>false</codeph>.
+          Identical to <codeph>isnottrue()</codeph>, except it returns the 
opposite value for a <codeph>NULL</codeph> argument.
+          <p conref="../shared/impala_common.xml#common/return_type_boolean"/>
+          <p conref="../shared/impala_common.xml#common/added_in_220"/>
+        </dd>
+
+      </dlentry>
+
+      <dlentry id="isnotfalse" rev="2.2.0">
+
+        <dt>
+          <codeph>isnotfalse(<varname>boolean</varname>)</codeph>
+        </dt>
+
+        <dd>
+          <indexterm audience="Cloudera">isnotfalse() function</indexterm>
+          <b>Purpose:</b> Tests if a Boolean expression is not 
<codeph>false</codeph> (that is, either <codeph>true</codeph> or 
<codeph>NULL</codeph>).
+          Returns <codeph>true</codeph> if so.
+          If the argument is <codeph>NULL</codeph>, returns 
<codeph>true</codeph>.
+          Identical to <codeph>istrue()</codeph>, except it returns the 
opposite value for a <codeph>NULL</codeph> argument.
+          <p conref="../shared/impala_common.xml#common/return_type_boolean"/>
+          <p 
conref="../shared/impala_common.xml#common/for_compatibility_only"/>
+          <p conref="../shared/impala_common.xml#common/added_in_220"/>
+        </dd>
+
+      </dlentry>
+
+      <dlentry id="isnottrue" rev="2.2.0">
+
+        <dt>
+          <codeph>isnottrue(<varname>boolean</varname>)</codeph>
+        </dt>
+
+        <dd>
+          <indexterm audience="Cloudera">isnottrue() function</indexterm>
+          <b>Purpose:</b> Tests if a Boolean expression is not 
<codeph>true</codeph> (that is, either <codeph>false</codeph> or 
<codeph>NULL</codeph>).
+          Returns <codeph>true</codeph> if so.
+          If the argument is <codeph>NULL</codeph>, returns 
<codeph>true</codeph>.
+          Identical to <codeph>isfalse()</codeph>, except it returns the 
opposite value for a <codeph>NULL</codeph> argument.
+          <p conref="../shared/impala_common.xml#common/return_type_boolean"/>
+          <p conref="../shared/impala_common.xml#common/added_in_220"/>
+        </dd>
+
+      </dlentry>
+
+      <dlentry id="isnull">
+
+        <dt>
+          <codeph>isnull(type a, type ifNotNull)</codeph>
+        </dt>
+
+        <dd>
+          <indexterm audience="Cloudera">isnull() function</indexterm>
+          <b>Purpose:</b> Tests if an expression is <codeph>NULL</codeph>, and 
returns the expression result value
+          if not. If the first argument is <codeph>NULL</codeph>, returns the 
second argument.
+          <p>
+            <b>Compatibility notes:</b> Equivalent to the 
<codeph>nvl()</codeph> function from Oracle Database or
+            <codeph>ifnull()</codeph> from MySQL. The <codeph>nvl()</codeph> 
and <codeph>ifnull()</codeph>
+            functions are also available in Impala.
+          </p>
+          <p>
+            <b>Return type:</b> Same as the first argument value
+          </p>
+        </dd>
+
+      </dlentry>
+
+      <dlentry id="istrue" rev="2.2.0">
+
+        <dt>
+          <codeph>istrue(<varname>boolean</varname>)</codeph>
+        </dt>
+
+        <dd>
+          <indexterm audience="Cloudera">istrue() function</indexterm>
+          <b>Purpose:</b> Tests if a Boolean expression is 
<codeph>true</codeph> or not.
+          Returns <codeph>true</codeph> if so.
+          If the argument is <codeph>NULL</codeph>, returns 
<codeph>false</codeph>.
+          Identical to <codeph>isnotfalse()</codeph>, except it returns the 
opposite value for a <codeph>NULL</codeph> argument.
+          <p conref="../shared/impala_common.xml#common/return_type_boolean"/>
+          <p 
conref="../shared/impala_common.xml#common/for_compatibility_only"/>
+          <p conref="../shared/impala_common.xml#common/added_in_220"/>
+        </dd>
+
+      </dlentry>
+
+      <dlentry id="notnullvalue" rev="2.2.0">
+
+        <dt>
+          <codeph>notnullvalue(<varname>expression</varname>)</codeph>
+        </dt>
+
+        <dd>
+          <indexterm audience="Cloudera">function</indexterm>
+          <b>Purpose:</b> Tests if an expression (of any type) is 
<codeph>NULL</codeph> or not.
+          Returns <codeph>false</codeph> if so.
+          The converse of <codeph>nullvalue()</codeph>.
+          <p conref="../shared/impala_common.xml#common/return_type_boolean"/>
+          <p 
conref="../shared/impala_common.xml#common/for_compatibility_only"/>
+          <p conref="../shared/impala_common.xml#common/added_in_220"/>
+        </dd>
+
+      </dlentry>
+
+      <dlentry rev="1.3.0" id="nullif">
+
+        <dt>
+          
<codeph>nullif(<varname>expr1</varname>,<varname>expr2</varname>)</codeph>
+        </dt>
+
+        <dd>
+          <indexterm audience="Cloudera">nullif() function</indexterm>
+          <b>Purpose:</b> Returns <codeph>NULL</codeph> if the two specified 
arguments are equal. If the specified
+          arguments are not equal, returns the value of 
<varname>expr1</varname>. The data types of the expressions
+          must be compatible, according to the conversion rules from <xref 
href="impala_datatypes.xml#datatypes"/>.
+          You cannot use an expression that evaluates to <codeph>NULL</codeph> 
for <varname>expr1</varname>; that
+          way, you can distinguish a return value of <codeph>NULL</codeph> 
from an argument value of
+          <codeph>NULL</codeph>, which would never match 
<varname>expr2</varname>.
+          <p>
+            <b>Usage notes:</b> This function is effectively shorthand for a 
<codeph>CASE</codeph> expression of
+            the form:
+          </p>
+<codeblock>CASE
+  WHEN <varname>expr1</varname> = <varname>expr2</varname> THEN NULL
+  ELSE <varname>expr1</varname>
+END</codeblock>
+          <p>
+            It is commonly used in division expressions, to produce a 
<codeph>NULL</codeph> result instead of a
+            divide-by-zero error when the divisor is equal to zero:
+          </p>
+<codeblock>select 1.0 / nullif(c1,0) as reciprocal from t1;</codeblock>
+          <p>
+            You might also use it for compatibility with other database 
systems that support the same
+            <codeph>NULLIF()</codeph> function.
+          </p>
+          <p conref="../shared/impala_common.xml#common/return_same_type"/>
+          <p conref="../shared/impala_common.xml#common/added_in_130"/>
+        </dd>
+
+      </dlentry>
+
+      <dlentry rev="1.3.0" id="nullifzero">
+
+        <dt>
+          <codeph>nullifzero(<varname>numeric_expr</varname>)</codeph>
+        </dt>
+
+        <dd>
+          <indexterm audience="Cloudera">nullifzero() function</indexterm>
+          <b>Purpose:</b> Returns <codeph>NULL</codeph> if the numeric 
expression evaluates to 0, otherwise returns
+          the result of the expression.
+          <p>
+            <b>Usage notes:</b> Used to avoid error conditions such as 
divide-by-zero in numeric calculations.
+            Serves as shorthand for a more elaborate <codeph>CASE</codeph> 
expression, to simplify porting SQL with
+            vendor extensions to Impala.
+          </p>
+          <p conref="../shared/impala_common.xml#common/return_same_type"/>
+          <p conref="../shared/impala_common.xml#common/added_in_130"/>
+        </dd>
+
+      </dlentry>
+
+      <dlentry id="nullvalue" rev="2.2.0">
+
+        <dt>
+          <codeph>nullvalue(<varname>expression</varname>)</codeph>
+        </dt>
+
+        <dd>
+          <indexterm audience="Cloudera">function</indexterm>
+          <b>Purpose:</b> Tests if an expression (of any type) is 
<codeph>NULL</codeph> or not.
+          Returns <codeph>true</codeph> if so.
+          The converse of <codeph>notnullvalue()</codeph>.
+          <p conref="../shared/impala_common.xml#common/return_type_boolean"/>
+          <p 
conref="../shared/impala_common.xml#common/for_compatibility_only"/>
+          <p conref="../shared/impala_common.xml#common/added_in_220"/>
+        </dd>
+
+      </dlentry>
+
+      <dlentry id="nvl" rev="1.1">
+
+        <dt>
+          <codeph>nvl(type a, type ifNotNull)</codeph>
+        </dt>
+
+        <dd>
+          <indexterm audience="Cloudera">nvl() function</indexterm>
+          <b>Purpose:</b> Alias for the <codeph>isnull()</codeph> function. 
Tests if an expression is
+          <codeph>NULL</codeph>, and returns the expression result value if 
not. If the first argument is
+          <codeph>NULL</codeph>, returns the second argument. Equivalent to 
the <codeph>nvl()</codeph> function
+          from Oracle Database or <codeph>ifnull()</codeph> from MySQL.
+          <p>
+            <b>Return type:</b> Same as the first argument value
+          </p>
+          <p conref="../shared/impala_common.xml#common/added_in_11"/>
+        </dd>
+
+      </dlentry>
+
+      <dlentry rev="1.3.0" id="zeroifnull">
+
+        <dt>
+          <codeph>zeroifnull(<varname>numeric_expr</varname>)</codeph>
+        </dt>
+
+        <dd>
+          <indexterm audience="Cloudera">zeroifnull() function</indexterm>
+          <b>Purpose:</b> Returns 0 if the numeric expression evaluates to 
<codeph>NULL</codeph>, otherwise returns
+          the result of the expression.
+          <p>
+            <b>Usage notes:</b> Used to avoid unexpected results due to 
unexpected propagation of
+            <codeph>NULL</codeph> values in numeric calculations. Serves as 
shorthand for a more elaborate
+            <codeph>CASE</codeph> expression, to simplify porting SQL with 
vendor extensions to Impala.
+          </p>
+          <p conref="../shared/impala_common.xml#common/return_same_type"/>
+          <p conref="../shared/impala_common.xml#common/added_in_130"/>
+        </dd>
+
+      </dlentry>
+    </dl>
+  </conbody>
+</concept>

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/463ddf92/docs/topics/impala_conversion_functions.xml
----------------------------------------------------------------------
diff --git a/docs/topics/impala_conversion_functions.xml 
b/docs/topics/impala_conversion_functions.xml
new file mode 100644
index 0000000..1050d0c
--- /dev/null
+++ b/docs/topics/impala_conversion_functions.xml
@@ -0,0 +1,758 @@
+<?xml version="1.0" encoding="UTF-8"?><!DOCTYPE concept PUBLIC "-//OASIS//DTD 
DITA Concept//EN" "concept.dtd">
+<concept id="conversion_functions">
+
+  <title>Impala Type Conversion Functions</title>
+  <titlealts><navtitle>Type Conversion Functions</navtitle></titlealts>
+  <prolog>
+    <metadata>
+      <data name="Category" value="Impala"/>
+      <data name="Category" value="Impala Functions"/>
+      <data name="Category" value="Impala Data Types"/>
+      <data name="Category" value="SQL"/>
+      <data name="Category" value="Data Analysts"/>
+      <data name="Category" value="Developers"/>
+      <data name="Category" value="Querying"/>
+    </metadata>
+  </prolog>
+
+  <conbody>
+
+    <p>
+      Conversion functions are usually used in combination with other 
functions, to explicitly pass the expected
+      data types. Impala has strict rules regarding data types for function 
parameters. For example, Impala does
+      not automatically convert a <codeph>DOUBLE</codeph> value to 
<codeph>FLOAT</codeph>, a
+      <codeph>BIGINT</codeph> value to <codeph>INT</codeph>, or other 
conversion where precision could be lost or
+      overflow could occur. Also, for reporting or dealing with loosely 
defined schemas in big data contexts,
+      you might frequently need to convert values to or from the 
<codeph>STRING</codeph> type.
+    </p>
+      
+    <note>
+      Although in CDH 5.5.0, the <codeph>SHOW FUNCTIONS</codeph> output for
+      database <codeph>_IMPALA_BUILTINS</codeph> contains some function 
signatures
+      matching the pattern <codeph>castto*</codeph>, these functions are not 
intended
+      for public use and are expected to be hidden in future.
+    </note>
+
+    <p>
+      <b>Function reference:</b>
+    </p>
+
+    <p>
+      Impala supports the following type conversion functions:
+    </p>
+
+<dl>
+
+<dlentry id="cast">
+<dt>
+<codeph>cast(<varname>expr</varname> AS <varname>type</varname>)</codeph>
+</dt>
+
+<dd>
+<indexterm audience="Cloudera">cast() function</indexterm>
+<b>Purpose:</b> Converts the value of an expression to any other type.
+If the expression value is of a type that cannot be converted to the target 
type, the result is <codeph>NULL</codeph>.
+<p><b>Usage notes:</b>
+Use <codeph>CAST</codeph> when passing a column value or literal to a function 
that
+expects a parameter with a different type.
+Frequently used in SQL operations such as <codeph>CREATE TABLE AS 
SELECT</codeph>
+and <codeph>INSERT ... VALUES</codeph> to ensure that values from various 
sources
+are of the appropriate type for the destination columns.
+Where practical, do a one-time <codeph>CAST()</codeph> operation during the 
ingestion process
+to make each column into the appropriate type, rather than using many 
<codeph>CAST()</codeph>
+operations in each query; doing type conversions for each row during each 
query can be expensive
+for tables with millions or billions of rows.
+</p>
+    <p conref="../shared/impala_common.xml#common/timezone_conversion_caveat"/>
+
+<p conref="../shared/impala_common.xml#common/example_blurb"/>
+<codeblock>select concat('Here are the first ',10,' results.'); -- Fails
+select concat('Here are the first ',cast(10 as string),' results.'); -- 
Succeeds
+</codeblock>
+<p>
+The following example starts with a text table where every column has a type 
of <codeph>STRING</codeph>,
+which might be how you ingest data of unknown schema until you can verify the 
cleanliness of the underly values.
+Then it uses <codeph>CAST()</codeph> to create a new Parquet table with the 
same data, but using specific
+numeric data types for the columns with numeric data. Using numeric types of 
appropriate sizes can result in
+substantial space savings on disk and in memory, and performance improvements 
in queries,
+over using strings or larger-than-necessary numeric types.
+</p>
+<codeblock>create table t1 (name string, x string, y string, z string);
+
+create table t2 stored as parquet
+as select
+  name,
+  cast(x as bigint) x,
+  cast(y as timestamp) y,
+  cast(z as smallint) z
+from t1;
+
+describe t2;
++------+----------+---------+
+| name | type     | comment |
++------+----------+---------+
+| name | string   |         |
+| x    | bigint   |         |
+| y    | smallint |         |
+| z    | tinyint  |         |
++------+----------+---------+
+</codeblock>
+<p conref="../shared/impala_common.xml#common/related_info"/>
+<p>
+<!-- TK: Can you cast to or from MAP, ARRAY, STRUCT? -->
+  For details of casts from each kind of data type, see the description of
+  the appropriate type:
+  <xref href="impala_tinyint.xml#tinyint"/>,
+  <xref href="impala_smallint.xml#smallint"/>,
+  <xref href="impala_int.xml#int"/>,
+  <xref href="impala_bigint.xml#bigint"/>,
+  <xref href="impala_float.xml#float"/>,
+  <xref href="impala_double.xml#double"/>,
+  <xref href="impala_decimal.xml#decimal"/>,
+  <xref href="impala_string.xml#string"/>,
+  <xref href="impala_char.xml#char"/>,
+  <xref href="impala_varchar.xml#varchar"/>,
+  <xref href="impala_timestamp.xml#timestamp"/>,
+  <xref href="impala_boolean.xml#boolean"/>
+</p>
+</dd>
+</dlentry>
+
+<dlentry rev="2.3.0" id="casttobigint" audience="Cloudera">
+<dt>
+<codeph>casttobigint(type value)</codeph>
+</dt>
+<dd>
+<indexterm audience="Cloudera">casttobigint() function</indexterm>
+<b>Purpose:</b> Converts the value of an expression to 
<codeph>BIGINT</codeph>. If the expression value is of a type that cannot be 
converted to the target type, the result is <codeph>NULL</codeph>.
+<p><b>Return type:</b> <codeph>bigint</codeph></p>
+<p conref="../shared/impala_common.xml#common/cast_convenience_fn_usage"/>
+<p conref="../shared/impala_common.xml#common/added_in_230"/>
+<p conref="../shared/impala_common.xml#common/example_blurb"/>
+<p conref="../shared/impala_common.xml#common/cast_convenience_fn_example"/>
+<codeblock>create table small_types (x tinyint, y smallint, z int);
+
+create table big_types as
+  select casttobigint(x) as x, casttobigint(y) as y, casttobigint(z) as z
+    from small_types;
+
+describe big_types;
++------+--------+---------+
+| name | type   | comment |
++------+--------+---------+
+| x    | bigint |         |
+| y    | bigint |         |
+| z    | bigint |         |
++------+--------+---------+
+</codeblock>
+</dd>
+</dlentry>
+
+<dlentry rev="2.3.0" id="casttoboolean" audience="Cloudera">
+<dt>
+<codeph>casttoboolean(type value)</codeph>
+</dt>
+<dd>
+<indexterm audience="Cloudera">casttoboolean() function</indexterm>
+<b>Purpose:</b> Converts the value of an expression to 
<codeph>BOOLEAN</codeph>.
+Numeric values of 0 evaluate to <codeph>false</codeph>, and non-zero values 
evaluate to <codeph>true</codeph>.
+If the expression value is of a type that cannot be converted to the target 
type, the result is <codeph>NULL</codeph>.
+In particular, <codeph>STRING</codeph> values (even <codeph>'1'</codeph>, 
<codeph>'0'</codeph>, <codeph>'true'</codeph>
+or <codeph>'false'</codeph>) always return <codeph>NULL</codeph> when 
converted to <codeph>BOOLEAN</codeph>.
+<p><b>Return type:</b> <codeph>boolean</codeph></p>
+<p conref="../shared/impala_common.xml#common/cast_convenience_fn_usage"/>
+<p conref="../shared/impala_common.xml#common/added_in_230"/>
+<p conref="../shared/impala_common.xml#common/example_blurb"/>
+<p conref="../shared/impala_common.xml#common/cast_convenience_fn_example"/>
+<codeblock>select casttoboolean(0);
++------------------+
+| casttoboolean(0) |
++------------------+
+| false            |
++------------------+
+
+select casttoboolean(1);
++------------------+
+| casttoboolean(1) |
++------------------+
+| true             |
++------------------+
+
+select casttoboolean(99);
++-------------------+
+| casttoboolean(99) |
++-------------------+
+| true              |
++-------------------+
+
+select casttoboolean(0.0);
++--------------------+
+| casttoboolean(0.0) |
++--------------------+
+| false              |
++--------------------+
+
+select casttoboolean(0.5);
++--------------------+
+| casttoboolean(0.5) |
++--------------------+
+| true               |
++--------------------+
+
+select casttoboolean('');
++-------------------+
+| casttoboolean('') |
++-------------------+
+| NULL              |
++-------------------+
+
+select casttoboolean('yes');
++----------------------+
+| casttoboolean('yes') |
++----------------------+
+| NULL                 |
++----------------------+
+
+select casttoboolean('0');
++--------------------+
+| casttoboolean('0') |
++--------------------+
+| NULL               |
++--------------------+
+
+select casttoboolean('true');
++-----------------------+
+| casttoboolean('true') |
++-----------------------+
+| NULL                  |
++-----------------------+
+
+select casttoboolean('false');
++------------------------+
+| casttoboolean('false') |
++------------------------+
+| NULL                   |
++------------------------+
+</codeblock>
+</dd>
+</dlentry>
+
+<dlentry rev="2.3.0" id="casttochar" audience="Cloudera">
+<dt>
+<codeph>casttochar(type value)</codeph>
+</dt>
+<dd>
+<indexterm audience="Cloudera">casttochar() function</indexterm>
+<b>Purpose:</b> Converts the value of an expression to <codeph>CHAR</codeph>. 
If the expression value is of a type that cannot be converted to the target 
type, the result is <codeph>NULL</codeph>.
+<p><b>Return type:</b> <codeph>char</codeph></p>
+<p conref="../shared/impala_common.xml#common/cast_convenience_fn_usage"/>
+<p conref="../shared/impala_common.xml#common/added_in_230"/>
+<p conref="../shared/impala_common.xml#common/example_blurb"/>
+<p conref="../shared/impala_common.xml#common/cast_convenience_fn_example"/>
+<codeblock>create table char_types as select casttochar('hello world') as c1, 
casttochar('xyz') as c2, casttochar('x') as c3;
++-------------------+
+| summary           |
++-------------------+
+| Inserted 1 row(s) |
++-------------------+
+
+describe char_types;
++------+--------+---------+
+| name | type   | comment |
++------+--------+---------+
+| c1   | string |         |
+| c2   | string |         |
+| c3   | string |         |
++------+--------+---------+
+</codeblock>
+</dd>
+</dlentry>
+
+<dlentry rev="2.3.0" id="casttodecimal" audience="Cloudera">
+<dt>
+<codeph>casttodecimal(type value)</codeph>
+</dt>
+<dd>
+<indexterm audience="Cloudera">casttodecimal() function</indexterm>
+<b>Purpose:</b> Converts the value of an expression to 
<codeph>DECIMAL</codeph>. If the expression value is of a type that cannot be 
converted to the target type, the result is <codeph>NULL</codeph>.
+<p><b>Return type:</b> <codeph>decimal</codeph></p>
+<p conref="../shared/impala_common.xml#common/cast_convenience_fn_usage"/>
+<p conref="../shared/impala_common.xml#common/added_in_230"/>
+<p conref="../shared/impala_common.xml#common/example_blurb"/>
+<p conref="../shared/impala_common.xml#common/cast_convenience_fn_example"/>
+<codeblock>select casttodecimal(5.4);
++--------------------+
+| casttodecimal(5.4) |
++--------------------+
+| 5.4                |
++--------------------+
+</codeblock>
+</dd>
+</dlentry>
+
+<dlentry rev="2.3.0" id="casttodouble" audience="Cloudera">
+<dt>
+<codeph>casttodouble(type value)</codeph>
+</dt>
+<dd>
+<indexterm audience="Cloudera">casttodouble() function</indexterm>
+<b>Purpose:</b> Converts the value of an expression to 
<codeph>DOUBLE</codeph>. If the expression value is of a type that cannot be 
converted to the target type, the result is <codeph>NULL</codeph>.
+<p><b>Return type:</b> <codeph>double</codeph></p>
+<p conref="../shared/impala_common.xml#common/cast_convenience_fn_usage"/>
+<p conref="../shared/impala_common.xml#common/added_in_230"/>
+<p conref="../shared/impala_common.xml#common/example_blurb"/>
+<p conref="../shared/impala_common.xml#common/cast_convenience_fn_example"/>
+<codeblock>select casttodouble(5);
++-----------------+
+| casttodouble(5) |
++-----------------+
+| 5               |
++-----------------+
+
+select casttodouble('3.141');
++-----------------------+
+| casttodouble('3.141') |
++-----------------------+
+| 3.141                 |
++-----------------------+
+
+select casttodouble(1e6);
++--------------------+
+| casttodouble(1e+6) |
++--------------------+
+| 1000000            |
++--------------------+
+
+select casttodouble(true);
++--------------------+
+| casttodouble(true) |
++--------------------+
+| 1                  |
++--------------------+
+
+select casttodouble(now());
++---------------------+
+| casttodouble(now()) |
++---------------------+
+| 1447622306.031178   |
++---------------------+
+</codeblock>
+</dd>
+</dlentry>
+
+<dlentry rev="2.3.0" id="casttofloat" audience="Cloudera">
+<dt>
+<codeph>casttofloat(type value)</codeph>
+</dt>
+<dd>
+<indexterm audience="Cloudera">casttofloat() function</indexterm>
+<b>Purpose:</b> Converts the value of an expression to <codeph>FLOAT</codeph>. 
If the expression value is of a type that cannot be converted to the target 
type, the result is <codeph>NULL</codeph>.
+<p><b>Return type:</b> <codeph>float</codeph></p>
+<p conref="../shared/impala_common.xml#common/cast_convenience_fn_usage"/>
+<p conref="../shared/impala_common.xml#common/added_in_230"/>
+<p conref="../shared/impala_common.xml#common/example_blurb"/>
+<p conref="../shared/impala_common.xml#common/cast_convenience_fn_example"/>
+<codeblock>select casttofloat(5);
++----------------+
+| casttofloat(5) |
++----------------+
+| 5              |
++----------------+
+
+select casttofloat('3.141');
++----------------------+
+| casttofloat('3.141') |
++----------------------+
+| 3.141000032424927    |
++----------------------+
+
+select casttofloat(1e6);
++-------------------+
+| casttofloat(1e+6) |
++-------------------+
+| 1000000           |
++-------------------+
+
+select casttofloat(true);
++-------------------+
+| casttofloat(true) |
++-------------------+
+| 1                 |
++-------------------+
+
+select casttofloat(now());
++--------------------+
+| casttofloat(now()) |
++--------------------+
+| 1447622400         |
++--------------------+
+</codeblock>
+</dd>
+</dlentry>
+
+<dlentry rev="2.3.0" id="casttoint" audience="Cloudera">
+<dt>
+<codeph>casttoint(type value)</codeph>
+</dt>
+<dd>
+<indexterm audience="Cloudera">casttoint() function</indexterm>
+<b>Purpose:</b> Converts the value of an expression to <codeph>INT</codeph>. 
If the expression value is of a type that cannot be converted to the target 
type, the result is <codeph>NULL</codeph>.
+<p><b>Return type:</b> <codeph>int</codeph></p>
+<p conref="../shared/impala_common.xml#common/cast_convenience_fn_usage"/>
+<p conref="../shared/impala_common.xml#common/added_in_230"/>
+<p conref="../shared/impala_common.xml#common/example_blurb"/>
+<p conref="../shared/impala_common.xml#common/cast_convenience_fn_example"/>
+<codeblock>select casttoint(5.4);
++----------------+
+| casttoint(5.4) |
++----------------+
+| 5              |
++----------------+
+
+select casttoint(true);
++-----------------+
+| casttoint(true) |
++-----------------+
+| 1               |
++-----------------+
+
+select casttoint(now());
++------------------+
+| casttoint(now()) |
++------------------+
+| 1447622487       |
++------------------+
+
+select casttoint('3.141');
++--------------------+
+| casttoint('3.141') |
++--------------------+
+| NULL               |
++--------------------+
+
+select casttoint('3');
++----------------+
+| casttoint('3') |
++----------------+
+| 3              |
++----------------+
+</codeblock>
+</dd>
+</dlentry>
+
+<dlentry rev="2.3.0" id="casttosmallint" audience="Cloudera">
+<dt>
+<codeph>casttosmallint(type value)</codeph>
+</dt>
+<dd>
+<indexterm audience="Cloudera">casttosmallint() function</indexterm>
+<b>Purpose:</b> Converts the value of an expression to 
<codeph>SMALLINT</codeph>. If the expression value is of a type that cannot be 
converted to the target type, the result is <codeph>NULL</codeph>.
+<p><b>Return type:</b> <codeph>smallint</codeph></p>
+<p conref="../shared/impala_common.xml#common/cast_convenience_fn_usage"/>
+<p conref="../shared/impala_common.xml#common/added_in_230"/>
+<p conref="../shared/impala_common.xml#common/example_blurb"/>
+<p conref="../shared/impala_common.xml#common/cast_convenience_fn_example"/>
+<codeblock>create table big_types (x bigint, y int, z smallint);
+
+create table small_types as
+  select casttosmallint(x) as x, casttosmallint(y) as y, casttosmallint(z) as z
+    from big_types;
+
+describe small_types;
++------+----------+---------+
+| name | type     | comment |
++------+----------+---------+
+| x    | smallint |         |
+| y    | smallint |         |
+| z    | smallint |         |
++------+----------+---------+
+</codeblock>
+</dd>
+</dlentry>
+
+<dlentry rev="2.3.0" id="casttostring" audience="Cloudera">
+<dt>
+<codeph>casttostring(type value)</codeph>
+</dt>
+<dd>
+<indexterm audience="Cloudera">casttostring() function</indexterm>
+<b>Purpose:</b> Converts the value of an expression to 
<codeph>STRING</codeph>. If the expression value is of a type that cannot be 
converted to the target type, the result is <codeph>NULL</codeph>.
+<p><b>Return type:</b> <codeph>string</codeph></p>
+<p conref="../shared/impala_common.xml#common/cast_convenience_fn_usage"/>
+<p conref="../shared/impala_common.xml#common/added_in_230"/>
+<p conref="../shared/impala_common.xml#common/example_blurb"/>
+<p conref="../shared/impala_common.xml#common/cast_convenience_fn_example"/>
+<codeblock>create table numeric_types (x int, y bigint, z tinyint);
+
+create table string_types as
+  select casttostring(x) as x, casttostring(y) as y, casttostring(z) as z
+    from numeric_types;
+
+describe string_types;
++------+--------+---------+
+| name | type   | comment |
++------+--------+---------+
+| x    | string |         |
+| y    | string |         |
+| z    | string |         |
++------+--------+---------+
+</codeblock>
+</dd>
+</dlentry>
+
+<dlentry rev="2.3.0" id="casttotimestamp" audience="Cloudera">
+<dt>
+<codeph>casttotimestamp(type value)</codeph>
+</dt>
+<dd>
+<indexterm audience="Cloudera">casttotimestamp() function</indexterm>
+<b>Purpose:</b> Converts the value of an expression to 
<codeph>TIMESTAMP</codeph>. If the expression value is of a type that cannot be 
converted to the target type, the result is <codeph>NULL</codeph>.
+<p><b>Return type:</b> <codeph>timestamp</codeph></p>
+<p conref="../shared/impala_common.xml#common/cast_convenience_fn_usage"/>
+<p conref="../shared/impala_common.xml#common/added_in_230"/>
+<p conref="../shared/impala_common.xml#common/example_blurb"/>
+<p conref="../shared/impala_common.xml#common/cast_convenience_fn_example"/>
+<codeblock>select casttotimestamp(1000);
++-----------------------+
+| casttotimestamp(1000) |
++-----------------------+
+| 1970-01-01 00:16:40   |
++-----------------------+
+
+select casttotimestamp(1000.0);
++-------------------------+
+| casttotimestamp(1000.0) |
++-------------------------+
+| 1970-01-01 00:16:40     |
++-------------------------+
+
+select casttotimestamp('1000');
++-------------------------+
+| casttotimestamp('1000') |
++-------------------------+
+| NULL                    |
++-------------------------+
+</codeblock>
+</dd>
+</dlentry>
+
+<dlentry rev="2.3.0" id="casttotinyint" audience="Cloudera">
+<dt>
+<codeph>casttotinyint(type value)</codeph>
+</dt>
+<dd>
+<indexterm audience="Cloudera">casttotinyint() function</indexterm>
+<b>Purpose:</b> Converts the value of an expression to 
<codeph>TINYINT</codeph>. If the expression value is of a type that cannot be 
converted to the target type, the result is <codeph>NULL</codeph>.
+<p><b>Return type:</b> <codeph>tinyint</codeph></p>
+<p conref="../shared/impala_common.xml#common/cast_convenience_fn_usage"/>
+<p conref="../shared/impala_common.xml#common/added_in_230"/>
+<p conref="../shared/impala_common.xml#common/example_blurb"/>
+<p conref="../shared/impala_common.xml#common/cast_convenience_fn_example"/>
+<codeblock>create table big_types (x bigint, y int, z smallint);
+
+create table tiny_types as
+  select casttotinyint(x) as x, casttotinyint(y) as y, casttotinyint(z) as z
+    from big_types;
+
+describe tiny_types;
++------+---------+---------+
+| name | type    | comment |
++------+---------+---------+
+| x    | tinyint |         |
+| y    | tinyint |         |
+| z    | tinyint |         |
++------+---------+---------+
+</codeblock>
+</dd>
+</dlentry>
+
+<dlentry rev="2.3.0" id="casttovarchar" audience="Cloudera">
+<dt>
+<codeph>casttovarchar(type value)</codeph>
+</dt>
+<dd>
+<indexterm audience="Cloudera">casttovarchar() function</indexterm>
+<b>Purpose:</b> Converts the value of an expression to 
<codeph>VARCHAR</codeph>. If the expression value is of a type that cannot be 
converted to the target type, the result is <codeph>NULL</codeph>.
+<p><b>Return type:</b> <codeph>varchar</codeph></p>
+<p conref="../shared/impala_common.xml#common/cast_convenience_fn_usage"/>
+<p conref="../shared/impala_common.xml#common/added_in_230"/>
+<p conref="../shared/impala_common.xml#common/example_blurb"/>
+<p conref="../shared/impala_common.xml#common/cast_convenience_fn_example"/>
+<codeblock>select casttovarchar('abcd');
++-----------------------+
+| casttovarchar('abcd') |
++-----------------------+
+| abcd                  |
++-----------------------+
+
+select casttovarchar(999);
++--------------------+
+| casttovarchar(999) |
++--------------------+
+| 999                |
++--------------------+
+
+select casttovarchar(999.5);
++----------------------+
+| casttovarchar(999.5) |
++----------------------+
+| 999.5                |
++----------------------+
+
+select casttovarchar(now());
++-------------------------------+
+| casttovarchar(now())          |
++-------------------------------+
+| 2015-11-15 21:26:13.528073000 |
++-------------------------------+
+
+select casttovarchar(true);
++---------------------+
+| casttovarchar(true) |
++---------------------+
+| 1                   |
++---------------------+
+</codeblock>
+</dd>
+</dlentry>
+
+<dlentry rev="2.3.0" id="typeof">
+<dt>
+<codeph>typeof(type value)</codeph>
+</dt>
+<dd>
+<indexterm audience="Cloudera">typeof() function</indexterm>
+<b>Purpose:</b> Returns the name of the data type corresponding to an 
expression. For types with
+extra attributes, such as length for <codeph>CHAR</codeph> and 
<codeph>VARCHAR</codeph>,
+or precision and scale for <codeph>DECIMAL</codeph>, includes the full 
specification of the type.
+<!-- To do: How about for columns of complex types? Or fields within complex 
types? -->
+<p><b>Return type:</b> <codeph>string</codeph></p>
+<p><b>Usage notes:</b> Typically used in interactive exploration of a schema, 
or in application code that programmatically generates schema definitions such 
as <codeph>CREATE TABLE</codeph> statements.
+For example, previously, to understand the type of an expression such as
+<codeph>col1 / col2</codeph> or <codeph>concat(col1, col2, col3)</codeph>,
+you might have created a dummy table with a single row, using syntax such as 
<codeph>CREATE TABLE foo AS SELECT 5 / 3.0</codeph>,
+and then doing a <codeph>DESCRIBE</codeph> to see the type of the row.
+Or you might have done a <codeph>CREATE TABLE AS SELECT</codeph> operation to 
create a table and
+copy data into it, only learning the types of the columns by doing a 
<codeph>DESCRIBE</codeph> afterward.
+This technique is especially useful for arithmetic expressions involving 
<codeph>DECIMAL</codeph> types,
+because the precision and scale of the result is typically different than that 
of the operands.
+</p>
+<p conref="../shared/impala_common.xml#common/added_in_230"/>
+<p conref="../shared/impala_common.xml#common/example_blurb"/>
+<p>
+These examples show how to check the type of a simple literal or function 
value.
+Notice how adding even tiny integers together changes the data type of the 
result to
+avoid overflow, and how the results of arithmetic operations on 
<codeph>DECIMAL</codeph> values
+have specific precision and scale attributes.
+</p>
+<codeblock>select typeof(2)
++-----------+
+| typeof(2) |
++-----------+
+| TINYINT   |
++-----------+
+
+select typeof(2+2)
++---------------+
+| typeof(2 + 2) |
++---------------+
+| SMALLINT      |
++---------------+
+
+select typeof('xyz')
++---------------+
+| typeof('xyz') |
++---------------+
+| STRING        |
++---------------+
+
+select typeof(now())
++---------------+
+| typeof(now()) |
++---------------+
+| TIMESTAMP     |
++---------------+
+
+select typeof(5.3 / 2.1)
++-------------------+
+| typeof(5.3 / 2.1) |
++-------------------+
+| DECIMAL(6,4)      |
++-------------------+
+
+select typeof(5.30001 / 2342.1);
++--------------------------+
+| typeof(5.30001 / 2342.1) |
++--------------------------+
+| DECIMAL(13,11)           |
++--------------------------+
+
+select typeof(typeof(2+2))
++-----------------------+
+| typeof(typeof(2 + 2)) |
++-----------------------+
+| STRING                |
++-----------------------+
+</codeblock>
+
+<p>
+This example shows how even if you do not have a record of the type of a 
column,
+for example because the type was changed by <codeph>ALTER TABLE</codeph> after 
the
+original <codeph>CREATE TABLE</codeph>, you can still find out the type in a
+more compact form than examining the full <codeph>DESCRIBE</codeph> output.
+Remember to use <codeph>LIMIT 1</codeph> in such cases, to avoid an identical
+result value for every row in the table.
+</p>
+<codeblock>create table typeof_example (a int, b tinyint, c smallint, d 
bigint);
+
+/* Empty result set if there is no data in the table. */
+select typeof(a) from typeof_example;
+
+/* OK, now we have some data but the type of column A is being changed. */
+insert into typeof_example values (1, 2, 3, 4);
+alter table typeof_example change a a bigint;
+
+/* We can always find out the current type of that column without doing a full 
DESCRIBE. */
+select typeof(a) from typeof_example limit 1;
++-----------+
+| typeof(a) |
++-----------+
+| BIGINT    |
++-----------+
+</codeblock>
+<p>
+This example shows how you might programmatically generate a <codeph>CREATE 
TABLE</codeph> statement
+with the appropriate column definitions to hold the result values of arbitrary 
expressions.
+The <codeph>typeof()</codeph> function lets you construct a detailed 
<codeph>CREATE TABLE</codeph> statement
+without actually creating the table, as opposed to <codeph>CREATE TABLE AS 
SELECT</codeph> operations
+where you create the destination table but only learn the column data types 
afterward through <codeph>DESCRIBE</codeph>.
+</p>
+<codeblock>describe typeof_example;
++------+----------+---------+
+| name | type     | comment |
++------+----------+---------+
+| a    | bigint   |         |
+| b    | tinyint  |         |
+| c    | smallint |         |
+| d    | bigint   |         |
++------+----------+---------+
+
+/* An ETL or business intelligence tool might create variations on a table 
with different file formats,
+   different sets of columns, and so on. TYPEOF() lets an application 
introspect the types of the original columns. */
+select concat('create table derived_table (a ', typeof(a), ', b ', typeof(b), 
', c ',
+    typeof(c), ', d ', typeof(d), ') stored as parquet;')
+  as 'create table statement'
+from typeof_example limit 1;
++-------------------------------------------------------------------------------------------+
+| create table statement                                                       
             |
++-------------------------------------------------------------------------------------------+
+| create table derived_table (a BIGINT, b TINYINT, c SMALLINT, d BIGINT) 
stored as parquet; |
++-------------------------------------------------------------------------------------------+
+</codeblock>
+</dd>
+</dlentry>
+
+</dl>
+
+  </conbody>
+</concept>

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/463ddf92/docs/topics/impala_count.xml
----------------------------------------------------------------------
diff --git a/docs/topics/impala_count.xml b/docs/topics/impala_count.xml
new file mode 100644
index 0000000..2f3f519
--- /dev/null
+++ b/docs/topics/impala_count.xml
@@ -0,0 +1,230 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE concept PUBLIC "-//OASIS//DTD DITA Concept//EN" "concept.dtd">
+<concept id="count">
+
+  <title>COUNT Function</title>
+  <titlealts><navtitle>COUNT</navtitle></titlealts>
+  <prolog>
+    <metadata>
+      <data name="Category" value="Impala"/>
+      <data name="Category" value="SQL"/>
+      <data name="Category" value="Impala Functions"/>
+      <data name="Category" value="Analytic Functions"/>
+      <data name="Category" value="Aggregate Functions"/>
+      <data name="Category" value="Querying"/>
+    </metadata>
+  </prolog>
+
+  <conbody>
+
+    <p>
+      <indexterm audience="Cloudera">count() function</indexterm>
+      An aggregate function that returns the number of rows, or the number of 
non-<codeph>NULL</codeph> rows.
+    </p>
+
+    <p conref="../shared/impala_common.xml#common/syntax_blurb"/>
+
+<codeblock>COUNT([DISTINCT | ALL] <varname>expression</varname>) [OVER 
(<varname>analytic_clause</varname>)]</codeblock>
+
+    <p>
+      Depending on the argument, <codeph>COUNT()</codeph> considers rows that 
meet certain conditions:
+    </p>
+
+    <ul>
+      <li>
+        The notation <codeph>COUNT(*)</codeph> includes <codeph>NULL</codeph> 
values in the total.
+      </li>
+
+      <li>
+        The notation <codeph>COUNT(<varname>column_name</varname>)</codeph> 
only considers rows where the column
+        contains a non-<codeph>NULL</codeph> value.
+      </li>
+
+      <li>
+        You can also combine <codeph>COUNT</codeph> with the 
<codeph>DISTINCT</codeph> operator to eliminate
+        duplicates before counting, and to count the combinations of values 
across multiple columns.
+      </li>
+    </ul>
+
+    <p>
+      When the query contains a <codeph>GROUP BY</codeph> clause, returns one 
value for each combination of
+      grouping values.
+    </p>
+
+    <p>
+      <b>Return type:</b> <codeph>BIGINT</codeph>
+    </p>
+
+    <p conref="../shared/impala_common.xml#common/complex_types_blurb"/>
+
+    <p 
conref="../shared/impala_common.xml#common/complex_types_aggregation_explanation"/>
+    
+    <p 
conref="../shared/impala_common.xml#common/complex_types_aggregation_example"/>
+
+    <p conref="../shared/impala_common.xml#common/example_blurb"/>
+
+<codeblock>-- How many rows total are in the table, regardless of NULL values?
+select count(*) from t1;
+-- How many rows are in the table with non-NULL values for a column?
+select count(c1) from t1;
+-- Count the rows that meet certain conditions.
+-- Again, * includes NULLs, so COUNT(*) might be greater than COUNT(col).
+select count(*) from t1 where x &gt; 10;
+select count(c1) from t1 where x &gt; 10;
+-- Can also be used in combination with DISTINCT and/or GROUP BY.
+-- Combine COUNT and DISTINCT to find the number of unique values.
+-- Must use column names rather than * with COUNT(DISTINCT ...) syntax.
+-- Rows with NULL values are not counted.
+select count(distinct c1) from t1;
+-- Rows with a NULL value in _either_ column are not counted.
+select count(distinct c1, c2) from t1;
+-- Return more than one result.
+select month, year, count(distinct visitor_id) from web_stats group by month, 
year;
+</codeblock>
+
+    <p rev="2.0.0">
+      The following examples show how to use <codeph>COUNT()</codeph> in an 
analytic context. They use a table
+      containing integers from 1 to 10. Notice how the 
<codeph>COUNT()</codeph> is reported for each input value, as
+      opposed to the <codeph>GROUP BY</codeph> clause which condenses the 
result set.
+<codeblock>select x, property, count(x) over (partition by property) as count 
from int_t where property in ('odd','even');
++----+----------+-------+
+| x  | property | count |
++----+----------+-------+
+| 2  | even     | 5     |
+| 4  | even     | 5     |
+| 6  | even     | 5     |
+| 8  | even     | 5     |
+| 10 | even     | 5     |
+| 1  | odd      | 5     |
+| 3  | odd      | 5     |
+| 5  | odd      | 5     |
+| 7  | odd      | 5     |
+| 9  | odd      | 5     |
++----+----------+-------+
+</codeblock>
+
+Adding an <codeph>ORDER BY</codeph> clause lets you experiment with results 
that are cumulative or apply to a moving
+set of rows (the <q>window</q>). The following examples use 
<codeph>COUNT()</codeph> in an analytic context
+(that is, with an <codeph>OVER()</codeph> clause) to produce a running count 
of all the even values,
+then a running count of all the odd values. The basic <codeph>ORDER BY 
x</codeph> clause implicitly
+activates a window clause of <codeph>RANGE BETWEEN UNBOUNDED PRECEDING AND 
CURRENT ROW</codeph>,
+which is effectively the same as <codeph>ROWS BETWEEN UNBOUNDED PRECEDING AND 
CURRENT ROW</codeph>,
+therefore all of these examples produce the same results:
+<codeblock>select x, property,
+  count(x) over (partition by property <b>order by x</b>) as 'cumulative count'
+  from int_t where property in ('odd','even');
++----+----------+------------------+
+| x  | property | cumulative count |
++----+----------+------------------+
+| 2  | even     | 1                |
+| 4  | even     | 2                |
+| 6  | even     | 3                |
+| 8  | even     | 4                |
+| 10 | even     | 5                |
+| 1  | odd      | 1                |
+| 3  | odd      | 2                |
+| 5  | odd      | 3                |
+| 7  | odd      | 4                |
+| 9  | odd      | 5                |
++----+----------+------------------+
+
+select x, property,
+  count(x) over
+  (
+    partition by property
+    <b>order by x</b>
+    <b>range between unbounded preceding and current row</b>
+  ) as 'cumulative total'
+from int_t where property in ('odd','even');
++----+----------+------------------+
+| x  | property | cumulative count |
++----+----------+------------------+
+| 2  | even     | 1                |
+| 4  | even     | 2                |
+| 6  | even     | 3                |
+| 8  | even     | 4                |
+| 10 | even     | 5                |
+| 1  | odd      | 1                |
+| 3  | odd      | 2                |
+| 5  | odd      | 3                |
+| 7  | odd      | 4                |
+| 9  | odd      | 5                |
++----+----------+------------------+
+
+select x, property,
+  count(x) over
+  (
+    partition by property
+    <b>order by x</b>
+    <b>rows between unbounded preceding and current row</b>
+  ) as 'cumulative total'
+  from int_t where property in ('odd','even');
++----+----------+------------------+
+| x  | property | cumulative count |
++----+----------+------------------+
+| 2  | even     | 1                |
+| 4  | even     | 2                |
+| 6  | even     | 3                |
+| 8  | even     | 4                |
+| 10 | even     | 5                |
+| 1  | odd      | 1                |
+| 3  | odd      | 2                |
+| 5  | odd      | 3                |
+| 7  | odd      | 4                |
+| 9  | odd      | 5                |
++----+----------+------------------+
+</codeblock>
+
+The following examples show how to construct a moving window, with a running 
count taking into account 1 row before
+and 1 row after the current row, within the same partition (all the even 
values or all the odd values).
+Therefore, the count is consistently 3 for rows in the middle of the window, 
and 2 for
+rows near the ends of the window, where there is no preceding or no following 
row in the partition.
+Because of a restriction in the Impala <codeph>RANGE</codeph> syntax, this 
type of
+moving window is possible with the <codeph>ROWS BETWEEN</codeph> clause but 
not the <codeph>RANGE BETWEEN</codeph>
+clause:
+<codeblock>select x, property,
+  count(x) over
+  (
+    partition by property
+    <b>order by x</b>
+    <b>rows between 1 preceding and 1 following</b>
+  ) as 'moving total'
+  from int_t where property in ('odd','even');
++----+----------+--------------+
+| x  | property | moving total |
++----+----------+--------------+
+| 2  | even     | 2            |
+| 4  | even     | 3            |
+| 6  | even     | 3            |
+| 8  | even     | 3            |
+| 10 | even     | 2            |
+| 1  | odd      | 2            |
+| 3  | odd      | 3            |
+| 5  | odd      | 3            |
+| 7  | odd      | 3            |
+| 9  | odd      | 2            |
++----+----------+--------------+
+
+-- Doesn't work because of syntax restriction on RANGE clause.
+select x, property,
+  count(x) over
+  (
+    partition by property
+    <b>order by x</b>
+    <b>range between 1 preceding and 1 following</b>
+  ) as 'moving total'
+from int_t where property in ('odd','even');
+ERROR: AnalysisException: RANGE is only supported with both the lower and 
upper bounds UNBOUNDED or one UNBOUNDED and the other CURRENT ROW.
+</codeblock>
+    </p>
+
+    <note conref="../shared/impala_common.xml#common/multiple_count_distinct"/>
+
+    <p conref="../shared/impala_common.xml#common/related_info"/>
+
+    <p>
+      <xref href="impala_analytic_functions.xml#analytic_functions"/>
+    </p>
+
+  </conbody>
+</concept>

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/463ddf92/docs/topics/impala_create_database.xml
----------------------------------------------------------------------
diff --git a/docs/topics/impala_create_database.xml 
b/docs/topics/impala_create_database.xml
new file mode 100644
index 0000000..f4153e0
--- /dev/null
+++ b/docs/topics/impala_create_database.xml
@@ -0,0 +1,115 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE concept PUBLIC "-//OASIS//DTD DITA Concept//EN" "concept.dtd">
+<concept id="create_database">
+
+  <title>CREATE DATABASE Statement</title>
+  <titlealts><navtitle>CREATE DATABASE</navtitle></titlealts>
+  <prolog>
+    <metadata>
+      <data name="Category" value="Impala"/>
+      <data name="Category" value="SQL"/>
+      <data name="Category" value="Databases"/>
+      <data name="Category" value="Schemas"/>
+      <data name="Category" value="DDL"/>
+    </metadata>
+  </prolog>
+
+  <conbody>
+
+    <p>
+      <indexterm audience="Cloudera">CREATE DATABASE statement</indexterm>
+      Creates a new database.
+    </p>
+
+    <p>
+      In Impala, a database is both:
+    </p>
+
+    <ul>
+      <li>
+        A logical construct for grouping together related tables, views, and 
functions within their own namespace.
+        You might use a separate database for each application, set of related 
tables, or round of experimentation.
+      </li>
+
+      <li>
+        A physical construct represented by a directory tree in HDFS. Tables 
(internal tables), partitions, and
+        data files are all located under this directory. You can perform 
HDFS-level operations such as backing it up and measuring space usage,
+        or remove it with a <codeph>DROP DATABASE</codeph> statement.
+      </li>
+    </ul>
+
+    <p conref="../shared/impala_common.xml#common/syntax_blurb"/>
+
+<codeblock>CREATE (DATABASE|SCHEMA) [IF NOT EXISTS] 
<varname>database_name</varname>[COMMENT '<varname>database_comment</varname>']
+  [LOCATION <varname>hdfs_path</varname>];</codeblock>
+
+    <p conref="../shared/impala_common.xml#common/ddl_blurb"/>
+
+    <p conref="../shared/impala_common.xml#common/usage_notes_blurb"/>
+
+    <p>
+      A database is physically represented as a directory in HDFS, with a 
filename extension <codeph>.db</codeph>,
+      under the main Impala data directory. If the associated HDFS directory 
does not exist, it is created for you.
+      All databases and their associated directories are top-level objects, 
with no physical or logical nesting.
+    </p>
+
+    <p>
+      After creating a database, to make it the current database within an 
<cmdname>impala-shell</cmdname> session,
+      use the <codeph>USE</codeph> statement. You can refer to tables in the 
current database without prepending
+      any qualifier to their names.
+    </p>
+
+    <p>
+      When you first connect to Impala through 
<cmdname>impala-shell</cmdname>, the database you start in (before
+      issuing any <codeph>CREATE DATABASE</codeph> or <codeph>USE</codeph> 
statements) is named
+      <codeph>default</codeph>.
+    </p>
+
+    <p conref="../shared/impala_common.xml#common/builtins_db"/>
+
+    <p>
+      After creating a database, your <cmdname>impala-shell</cmdname> session 
or another
+      <cmdname>impala-shell</cmdname> connected to the same node can 
immediately access that database. To access
+      the database through the Impala daemon on a different node, issue the 
<codeph>INVALIDATE METADATA</codeph>
+      statement first while connected to that other node.
+    </p>
+
+    <p>
+      Setting the <codeph>LOCATION</codeph> attribute for a new database is a 
way to work with sets of files in an
+      HDFS directory structure outside the default Impala data directory, as 
opposed to setting the
+      <codeph>LOCATION</codeph> attribute for each individual table.
+    </p>
+
+    <p conref="../shared/impala_common.xml#common/sync_ddl_blurb"/>
+
+    <p conref="../shared/impala_common.xml#common/hive_blurb"/>
+
+    <p>
+      When you create a database in Impala, the database can also be used by 
Hive.
+      When you create a database in Hive, issue an <codeph>INVALIDATE 
METADATA</codeph>
+      statement in Impala to make Impala permanently aware of the new 
database. 
+    </p>
+
+    <p conref="../shared/impala_common.xml#common/cancel_blurb_no"/>
+
+    <p conref="../shared/impala_common.xml#common/permissions_blurb"/>
+    <p rev="CDH-19187">
+      The user ID that the <cmdname>impalad</cmdname> daemon runs under,
+      typically the <codeph>impala</codeph> user, must have write
+      permission for the parent HDFS directory under which the database
+      is located.
+    </p>
+
+    <p conref="../shared/impala_common.xml#common/example_blurb"/>
+
+    <codeblock 
conref="../shared/impala_common.xml#common/create_drop_db_example"/>
+
+    <p conref="../shared/impala_common.xml#common/related_info"/>
+
+    <p>
+      <xref href="impala_databases.xml#databases"/>, <xref 
href="impala_drop_database.xml#drop_database"/>,
+      <xref href="impala_use.xml#use"/>, <xref 
href="impala_show.xml#show_databases"/>,
+      <xref href="impala_tables.xml#tables"/>
+    </p>
+  </conbody>
+</concept>

[16/22] incubator-impala git commit: First try at porting over the source files necessary for the Impala SQL Reference.

Reply via email to