[12/22] incubator-impala git commit: First try at porting over the source files necessary for the Impala SQL Reference.

jrussell Tue, 26 Jul 2016 16:05:48 -0700

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/463ddf92/docs/topics/impala_double.xml
----------------------------------------------------------------------
diff --git a/docs/topics/impala_double.xml b/docs/topics/impala_double.xml
new file mode 100644
index 0000000..f1d1756
--- /dev/null
+++ b/docs/topics/impala_double.xml
@@ -0,0 +1,100 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE concept PUBLIC "-//OASIS//DTD DITA Concept//EN" "concept.dtd">
+<concept id="double">
+
+  <title>DOUBLE Data Type</title>
+  <titlealts><navtitle>DOUBLE</navtitle></titlealts>
+  <prolog>
+    <metadata>
+      <data name="Category" value="Impala"/>
+      <data name="Category" value="Impala Data Types"/>
+      <data name="Category" value="SQL"/>
+      <data name="Category" value="Data Analysts"/>
+      <data name="Category" value="Developers"/>
+      <data name="Category" value="Schemas"/>
+    </metadata>
+  </prolog>
+
+  <conbody>
+
+    <p>
+      A double precision floating-point data type used in <codeph>CREATE 
TABLE</codeph> and <codeph>ALTER
+      TABLE</codeph> statements.
+    </p>
+
+    <p conref="../shared/impala_common.xml#common/syntax_blurb"/>
+
+    <p>
+      In the column definition of a <codeph>CREATE TABLE</codeph> statement:
+    </p>
+
+<codeblock><varname>column_name</varname> DOUBLE</codeblock>
+
+    <p>
+      <b>Range:</b> 4.94065645841246544e-324d .. 1.79769313486231570e+308, 
positive or negative
+    </p>
+
+    <p>
+      <b>Precision:</b> 15 to 17 significant digits, depending on usage. The 
number of significant digits does
+      not depend on the position of the decimal point.
+    </p>
+
+    <p>
+      <b>Representation:</b> The values are stored in 8 bytes, using
+      <xref 
href="https://en.wikipedia.org/wiki/Double-precision_floating-point_format"; 
scope="external" format="html">IEEE 754 Double Precision Binary Floating 
Point</xref> format.
+    </p>
+
+    <p>
+      <b>Conversions:</b> Impala does not automatically convert 
<codeph>DOUBLE</codeph> to any other type. You can
+      use <codeph>CAST()</codeph> to convert <codeph>DOUBLE</codeph> values to 
<codeph>FLOAT</codeph>,
+      <codeph>TINYINT</codeph>, <codeph>SMALLINT</codeph>, 
<codeph>INT</codeph>, <codeph>BIGINT</codeph>,
+      <codeph>STRING</codeph>, <codeph>TIMESTAMP</codeph>, or 
<codeph>BOOLEAN</codeph>. You can use exponential
+      notation in <codeph>DOUBLE</codeph> literals or when casting from 
<codeph>STRING</codeph>, for example
+      <codeph>1.0e6</codeph> to represent one million.
+      <ph conref="../shared/impala_common.xml#common/cast_int_to_timestamp"/>
+    </p>
+
+    <p conref="../shared/impala_common.xml#common/usage_notes_blurb"/>
+
+    <p>
+      The data type <codeph>REAL</codeph> is an alias for 
<codeph>DOUBLE</codeph>.
+    </p>
+
+    <p conref="../shared/impala_common.xml#common/example_blurb"/>
+
+<codeblock>CREATE TABLE t1 (x DOUBLE);
+SELECT CAST(1000.5 AS DOUBLE);
+</codeblock>
+
+    <p conref="../shared/impala_common.xml#common/partitioning_imprecise"/>
+
+    <p conref="../shared/impala_common.xml#common/hbase_ok"/>
+
+    <p conref="../shared/impala_common.xml#common/parquet_ok"/>
+
+    <p conref="../shared/impala_common.xml#common/text_bulky"/>
+
+<!-- <p 
conref="/Content/impala_common_xi44078.xml#common/compatibility_blurb"/> -->
+
+    <p conref="../shared/impala_common.xml#common/internals_8_bytes"/>
+
+<!-- <p conref="/Content/impala_common_xi44078.xml#common/added_in_20"/> -->
+
+    <p conref="../shared/impala_common.xml#common/column_stats_constant"/>
+
+    <p conref="../shared/impala_common.xml#common/restrictions_blurb"/>
+
+<!-- This conref appears under SUM(), AVG(), FLOAT, and DOUBLE topics. -->
+
+    <p conref="../shared/impala_common.xml#common/sum_double"/>
+
+    <p 
conref="../shared/impala_common.xml#common/float_double_decimal_caveat"/>
+
+    <p conref="../shared/impala_common.xml#common/related_info"/>
+
+    <p>
+      <xref href="impala_literals.xml#numeric_literals"/>, <xref 
href="impala_math_functions.xml#math_functions"/>,
+      <xref href="impala_float.xml#float"/>
+    </p>
+  </conbody>
+</concept>


http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/463ddf92/docs/topics/impala_drop_database.xml
----------------------------------------------------------------------
diff --git a/docs/topics/impala_drop_database.xml 
b/docs/topics/impala_drop_database.xml
new file mode 100644
index 0000000..c6a1b64
--- /dev/null
+++ b/docs/topics/impala_drop_database.xml
@@ -0,0 +1,124 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE concept PUBLIC "-//OASIS//DTD DITA Concept//EN" "concept.dtd">
+<concept id="drop_database">
+
+  <title>DROP DATABASE Statement</title>
+  <titlealts><navtitle>DROP DATABASE</navtitle></titlealts>
+  <prolog>
+    <metadata>
+      <data name="Category" value="Impala"/>
+      <data name="Category" value="SQL"/>
+      <data name="Category" value="Databases"/>
+      <data name="Category" value="DDL"/>
+      <data name="Category" value="Schemas"/>
+    </metadata>
+  </prolog>
+
+  <conbody>
+
+    <p>
+      <indexterm audience="Cloudera">DROP DATABASE statement</indexterm>
+      Removes a database from the system. The physical operations involve 
removing the metadata for the database
+      from the metastore, and deleting the corresponding <codeph>*.db</codeph> 
directory from HDFS.
+    </p>
+
+    <p conref="../shared/impala_common.xml#common/syntax_blurb"/>
+
+<codeblock>DROP (DATABASE|SCHEMA) [IF EXISTS] <varname>database_name</varname> 
<ph rev="2.3.0">[RESTRICT | CASCADE]</ph>;</codeblock>
+
+    <p conref="../shared/impala_common.xml#common/ddl_blurb"/>
+
+    <p conref="../shared/impala_common.xml#common/usage_notes_blurb"/>
+
+    <p>
+      By default, the database must be empty before it can be dropped, to 
avoid losing any data.
+    </p>
+
+    <p rev="2.3.0">
+      In CDH 5.5 / Impala 2.3 and higher, you can include the 
<codeph>CASCADE</codeph>
+      clause to make Impala drop all tables and other objects in the database 
before dropping the database itself.
+      The <codeph>RESTRICT</codeph> clause enforces the original requirement 
that the database be empty
+      before being dropped. Because the <codeph>RESTRICT</codeph> behavior is 
still the default, this
+      clause is optional.
+    </p>
+
+    <p rev="2.3.0">
+      The automatic dropping resulting from the <codeph>CASCADE</codeph> 
clause follows the same rules as the
+      corresponding <codeph>DROP TABLE</codeph>, <codeph>DROP VIEW</codeph>, 
and <codeph>DROP FUNCTION</codeph> statements.
+      In particular, the HDFS directories and data files for any external 
tables are left behind when the
+      tables are removed.
+    </p>
+
+    <p>
+      When you do not use the <codeph>CASCADE</codeph> clause, drop or move 
all the objects inside the database manually
+      before dropping the database itself:
+    </p>
+
+    <ul>
+      <li>
+        <p>
+          Use the <codeph>SHOW TABLES</codeph> statement to locate all tables 
and views in the database,
+          and issue <codeph>DROP TABLE</codeph> and <codeph>DROP VIEW</codeph> 
statements to remove them all.
+        </p>
+      </li>
+      <li>
+        <p>
+          Use the <codeph>SHOW FUNCTIONS</codeph> and <codeph>SHOW AGGREGATE 
FUNCTIONS</codeph> statements
+          to locate all user-defined functions in the database, and issue 
<codeph>DROP FUNCTION</codeph>
+          and <codeph>DROP AGGREGATE FUNCTION</codeph> statements to remove 
them all.
+        </p>
+      </li>
+      <li>
+        <p>
+          To keep tables or views contained by a database while removing the 
database itself, use 
+          <codeph>ALTER TABLE</codeph> and <codeph>ALTER VIEW</codeph> to move 
the relevant
+          objects to a different database before dropping the original 
database.
+        </p>
+      </li>
+    </ul>
+
+    <p>
+      You cannot drop the current database, that is, the database your session 
connected to
+      either through the <codeph>USE</codeph> statement or the 
<codeph>-d</codeph> option of <cmdname>impala-shell</cmdname>.
+      Issue a <codeph>USE</codeph> statement to switch to a different database 
first.
+      Because the <codeph>default</codeph> database is always available, 
issuing
+      <codeph>USE default</codeph> is a convenient way to leave the current 
database
+      before dropping it.
+    </p>
+
+    <p conref="../shared/impala_common.xml#common/hive_blurb"/>
+
+    <p>
+      When you drop a database in Impala, the database can no longer be used 
by Hive.
+    </p>
+
+    <p conref="../shared/impala_common.xml#common/example_blurb"/>
+
+<!-- Better to conref the same examples in both places. -->
+
+    <p>
+      See <xref href="impala_create_database.xml#create_database"/> for 
examples covering <codeph>CREATE
+      DATABASE</codeph>, <codeph>USE</codeph>, and <codeph>DROP 
DATABASE</codeph>.
+    </p>
+
+    <p conref="../shared/impala_common.xml#common/cancel_blurb_no"/>
+
+    <p conref="../shared/impala_common.xml#common/permissions_blurb"/>
+    <p rev="CDH-19187">
+      The user ID that the <cmdname>impalad</cmdname> daemon runs under,
+      typically the <codeph>impala</codeph> user, must have write
+      permission for the directory associated with the database.
+    </p>
+
+    <p conref="../shared/impala_common.xml#common/example_blurb"/>
+
+    <codeblock 
conref="../shared/impala_common.xml#common/create_drop_db_example"/>
+
+    <p conref="../shared/impala_common.xml#common/related_info"/>
+
+    <p>
+      <xref href="impala_databases.xml#databases"/>, <xref 
href="impala_create_database.xml#create_database"/>,
+      <xref href="impala_use.xml#use"/>, <xref 
href="impala_show.xml#show_databases"/>, <xref 
href="impala_drop_table.xml#drop_table"/>
+    </p>
+  </conbody>
+</concept>

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/463ddf92/docs/topics/impala_drop_function.xml
----------------------------------------------------------------------
diff --git a/docs/topics/impala_drop_function.xml 
b/docs/topics/impala_drop_function.xml
new file mode 100644
index 0000000..51a4d90
--- /dev/null
+++ b/docs/topics/impala_drop_function.xml
@@ -0,0 +1,60 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE concept PUBLIC "-//OASIS//DTD DITA Concept//EN" "concept.dtd">
+<concept rev="1.2" id="drop_function">
+
+  <title>DROP FUNCTION Statement</title>
+  <titlealts><navtitle>DROP FUNCTION</navtitle></titlealts>
+  <prolog>
+    <metadata>
+      <data name="Category" value="Impala"/>
+      <data name="Category" value="SQL"/>
+      <data name="Category" value="DDL"/>
+      <data name="Category" value="Impala Functions"/>
+      <data name="Category" value="UDFs"/>
+      <data name="Category" value="Schemas"/>
+    </metadata>
+  </prolog>
+
+  <conbody>
+
+    <p>
+      <indexterm audience="Cloudera">DROP FUNCTION statement</indexterm>
+      Removes a user-defined function (UDF), so that it is not available for 
execution during Impala
+      <codeph>SELECT</codeph> or <codeph>INSERT</codeph> operations.
+    </p>
+
+    <p conref="../shared/impala_common.xml#common/syntax_blurb"/>
+
+<codeblock>DROP [AGGREGATE] FUNCTION [IF EXISTS] 
[<varname>db_name</varname>.]<varname>function_name</varname>(<varname>type</varname>[,
 <varname>type</varname>...])</codeblock>
+
+    <p conref="../shared/impala_common.xml#common/ddl_blurb"/>
+
+    <p conref="../shared/impala_common.xml#common/usage_notes_blurb"/>
+
+    <p>
+      Because the same function name could be overloaded with different 
argument signatures, you specify the
+      argument types to identify the exact function to drop.
+    </p>
+
+    <p conref="../shared/impala_common.xml#common/restrictions_blurb"/>
+
+    <p 
conref="../shared/impala_common.xml#common/udf_persistence_restriction"/>
+
+    <p conref="../shared/impala_common.xml#common/cancel_blurb_no"/>
+
+    <p conref="../shared/impala_common.xml#common/permissions_blurb"/>
+    <p rev="CDH-19187">
+      The user ID that the <cmdname>impalad</cmdname> daemon runs under,
+      typically the <codeph>impala</codeph> user, does not need any
+      particular HDFS permissions to perform this statement.
+      All read and write operations are on the metastore database,
+      not HDFS files and directories.
+    </p>
+
+    <p conref="../shared/impala_common.xml#common/related_info"/>
+
+    <p>
+      <xref href="impala_udf.xml#udfs"/>, <xref 
href="impala_create_function.xml#create_function"/>
+    </p>
+  </conbody>
+</concept>

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/463ddf92/docs/topics/impala_drop_role.xml
----------------------------------------------------------------------
diff --git a/docs/topics/impala_drop_role.xml b/docs/topics/impala_drop_role.xml
new file mode 100644
index 0000000..35d2157
--- /dev/null
+++ b/docs/topics/impala_drop_role.xml
@@ -0,0 +1,67 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE concept PUBLIC "-//OASIS//DTD DITA Concept//EN" "concept.dtd">
+<concept rev="1.4.0" id="drop_role">
+
+  <title>DROP ROLE Statement (CDH 5.2 or higher only)</title>
+  <titlealts><navtitle>DROP ROLE (CDH 5.2 or higher 
only)</navtitle></titlealts>
+  <prolog>
+    <metadata>
+      <data name="Category" value="Impala"/>
+      <data name="Category" value="DDL"/>
+      <data name="Category" value="SQL"/>
+      <data name="Category" value="Sentry"/>
+      <data name="Category" value="Roles"/>
+      <!-- Consider whether to go deeper into categories like Security for the 
Sentry-related statements. -->
+    </metadata>
+  </prolog>
+
+  <conbody>
+
+    <p>
+      <indexterm audience="Cloudera">DROP ROLE statement</indexterm>
+<!-- Copied from Sentry docs. Turn into conref. I did some rewording for 
clarity. -->
+      The <codeph>DROP ROLE</codeph> statement removes a role from the 
metastore database. Once dropped, the role
+      is revoked for all users to whom it was previously assigned, and all 
privileges granted to that role are
+      revoked. Queries that are already executing are not affected. Impala 
verifies the role information
+      approximately every 60 seconds, so the effects of <codeph>DROP 
ROLE</codeph> might not take effect for new
+      Impala queries for a brief period.
+    </p>
+
+    <p conref="../shared/impala_common.xml#common/syntax_blurb"/>
+
+<codeblock>DROP ROLE <varname>role_name</varname>
+</codeblock>
+
+    <p conref="../shared/impala_common.xml#common/privileges_blurb"/>
+
+    <p>
+      Only administrative users (initially, a predefined set of users 
specified in the Sentry service configuration
+      file) can use this statement.
+    </p>
+
+    <p conref="../shared/impala_common.xml#common/compatibility_blurb"/>
+
+    <p>
+      Impala makes use of any roles and privileges specified by the 
<codeph>GRANT</codeph> and
+      <codeph>REVOKE</codeph> statements in Hive, and Hive makes use of any 
roles and privileges specified by the
+      <codeph>GRANT</codeph> and <codeph>REVOKE</codeph> statements in Impala. 
The Impala <codeph>GRANT</codeph>
+      and <codeph>REVOKE</codeph> statements for privileges do not require the 
<codeph>ROLE</codeph> keyword to be
+      repeated before each role name, unlike the equivalent Hive statements.
+    </p>
+
+    <p conref="../shared/impala_common.xml#common/related_info"/>
+
+    <p>
+      <xref href="impala_authorization.xml#authorization"/>, <xref 
href="impala_grant.xml#grant"/>
+      <xref href="impala_revoke.xml#revoke"/>, <xref 
href="impala_create_role.xml#create_role"/>,
+      <xref href="impala_show.xml#show"/>
+    </p>
+
+<!-- To do: nail down the new SHOW syntax, e.g. SHOW ROLES, SHOW CURRENT 
ROLES, SHOW GROUPS. -->
+
+    <p conref="../shared/impala_common.xml#common/cancel_blurb_no"/>
+
+    <p conref="../shared/impala_common.xml#common/permissions_blurb_no"/>
+
+  </conbody>
+</concept>

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/463ddf92/docs/topics/impala_drop_stats.xml
----------------------------------------------------------------------
diff --git a/docs/topics/impala_drop_stats.xml 
b/docs/topics/impala_drop_stats.xml
new file mode 100644
index 0000000..56697f4
--- /dev/null
+++ b/docs/topics/impala_drop_stats.xml
@@ -0,0 +1,275 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE concept PUBLIC "-//OASIS//DTD DITA Concept//EN" "concept.dtd">
+<concept rev="2.1.0" id="drop_stats" xml:lang="en-US">
+
+  <title>DROP STATS Statement</title>
+  <titlealts><navtitle>DROP STATS</navtitle></titlealts>
+  <prolog>
+    <metadata>
+      <data name="Category" value="Impala"/>
+      <data name="Category" value="SQL"/>
+      <data name="Category" value="DDL"/>
+      <data name="Category" value="Tables"/>
+      <data name="Category" value="Performance"/>
+      <data name="Category" value="Scalability"/>
+    </metadata>
+  </prolog>
+
+  <conbody>
+
+    <p>
+      <indexterm audience="Cloudera">DROP STATS statement</indexterm>
+      Removes the specified statistics from a table or partition. The 
statistics were originally created by the
+      <codeph>COMPUTE STATS</codeph> or <codeph>COMPUTE INCREMENTAL 
STATS</codeph> statement.
+    </p>
+
+    <p conref="../shared/impala_common.xml#common/syntax_blurb"/>
+
+<codeblock rev="2.1.0">DROP STATS 
[<varname>database_name</varname>.]<varname>table_name</varname>
+DROP INCREMENTAL STATS 
[<varname>database_name</varname>.]<varname>table_name</varname> PARTITION 
(<varname>partition_spec</varname>)
+
+<varname>partition_spec</varname> ::= 
<varname>partition_col</varname>=<varname>constant_value</varname>
+</codeblock>
+
+    <p conref="../shared/impala_common.xml#common/incremental_partition_spec"/>
+
+    <p>
+      <codeph>DROP STATS</codeph> removes all statistics from the table, 
whether created by <codeph>COMPUTE
+      STATS</codeph> or <codeph>COMPUTE INCREMENTAL STATS</codeph>.
+    </p>
+
+    <p rev="2.1.0">
+      <codeph>DROP INCREMENTAL STATS</codeph> only affects incremental 
statistics for a single partition, specified
+      through the <codeph>PARTITION</codeph> clause. The incremental stats are 
marked as outdated, so that they are
+      recomputed by the next <codeph>COMPUTE INCREMENTAL STATS</codeph> 
statement.
+    </p>
+
+<!-- To do: what release was this added in? -->
+
+    <p conref="../shared/impala_common.xml#common/usage_notes_blurb"/>
+
+    <p>
+      You typically use this statement when the statistics for a table or a 
partition have become stale due to data
+      files being added to or removed from the associated HDFS data 
directories, whether by manual HDFS operations
+      or <codeph>INSERT</codeph>, <codeph>INSERT OVERWRITE</codeph>, or 
<codeph>LOAD DATA</codeph> statements, or
+      adding or dropping partitions.
+    </p>
+
+    <p>
+      When a table or partition has no associated statistics, Impala treats it 
as essentially zero-sized when
+      constructing the execution plan for a query. In particular, the 
statistics influence the order in which
+      tables are joined in a join query. To ensure proper query planning and 
good query performance and
+      scalability, make sure to run <codeph>COMPUTE STATS</codeph> or 
<codeph>COMPUTE INCREMENTAL STATS</codeph> on
+      the table or partition after removing any stale statistics.
+    </p>
+
+    <p>
+      Dropping the statistics is not required for an unpartitioned table or a 
partitioned table covered by the
+      original type of statistics. A subsequent <codeph>COMPUTE STATS</codeph> 
statement replaces any existing
+      statistics with new ones, for all partitions, regardless of whether the 
old ones were outdated. Therefore,
+      this statement was rarely used before the introduction of incremental 
statistics.
+    </p>
+
+    <p>
+      Dropping the statistics is required for a partitioned table containing 
incremental statistics, to make a
+      subsequent <codeph>COMPUTE INCREMENTAL STATS</codeph> statement rescan 
an existing partition. See
+      <xref href="impala_perf_stats.xml#perf_stats"/> for information about 
incremental statistics, a new feature
+      available in Impala 2.1.0 and higher.
+    </p>
+
+    <p conref="../shared/impala_common.xml#common/ddl_blurb"/>
+
+    <p conref="../shared/impala_common.xml#common/cancel_blurb_no"/>
+
+    <p conref="../shared/impala_common.xml#common/permissions_blurb"/>
+    <p rev="CDH-19187">
+      The user ID that the <cmdname>impalad</cmdname> daemon runs under,
+      typically the <codeph>impala</codeph> user, does not need any
+      particular HDFS permissions to perform this statement.
+      All read and write operations are on the metastore database,
+      not HDFS files and directories.
+    </p>
+
+    <p conref="../shared/impala_common.xml#common/example_blurb"/>
+
+    <p>
+      The following example shows a partitioned table that has associated 
statistics produced by the
+      <codeph>COMPUTE INCREMENTAL STATS</codeph> statement, and how the 
situation evolves as statistics are dropped
+      from specific partitions, then the entire table.
+    </p>
+
+    <p>
+      Initially, all table and column statistics are filled in.
+    </p>
+
+<!-- Note: chopped off any excess characters at position 87 and after,
+           to avoid weird wrapping in PDF.
+           Applies to any subsequent examples with output from SHOW ... STATS 
too. -->
+
+<codeblock>show table stats item_partitioned;
++-------------+-------+--------+----------+--------------+---------+------------------
+| i_category  | #Rows | #Files | Size     | Bytes Cached | Format  | 
Incremental stats
++-------------+-------+--------+----------+--------------+---------+------------------
+| Books       | 1733  | 1      | 223.74KB | NOT CACHED   | PARQUET | true
+| Children    | 1786  | 1      | 230.05KB | NOT CACHED   | PARQUET | true
+| Electronics | 1812  | 1      | 232.67KB | NOT CACHED   | PARQUET | true
+| Home        | 1807  | 1      | 232.56KB | NOT CACHED   | PARQUET | true
+| Jewelry     | 1740  | 1      | 223.72KB | NOT CACHED   | PARQUET | true
+| Men         | 1811  | 1      | 231.25KB | NOT CACHED   | PARQUET | true
+| Music       | 1860  | 1      | 237.90KB | NOT CACHED   | PARQUET | true
+| Shoes       | 1835  | 1      | 234.90KB | NOT CACHED   | PARQUET | true
+| Sports      | 1783  | 1      | 227.97KB | NOT CACHED   | PARQUET | true
+| Women       | 1790  | 1      | 226.27KB | NOT CACHED   | PARQUET | true
+| Total       | 17957 | 10     | 2.25MB   | 0B           |         |
++-------------+-------+--------+----------+--------------+---------+------------------
+show column stats item_partitioned;
++------------------+-----------+------------------+--------+----------+---------------
+| Column           | Type      | #Distinct Values | #Nulls | Max Size | Avg 
Size
++------------------+-----------+------------------+--------+----------+---------------
+| i_item_sk        | INT       | 19443            | -1     | 4        | 4
+| i_item_id        | STRING    | 9025             | -1     | 16       | 16
+| i_rec_start_date | TIMESTAMP | 4                | -1     | 16       | 16
+| i_rec_end_date   | TIMESTAMP | 3                | -1     | 16       | 16
+| i_item_desc      | STRING    | 13330            | -1     | 200      | 
100.3028030395
+| i_current_price  | FLOAT     | 2807             | -1     | 4        | 4
+| i_wholesale_cost | FLOAT     | 2105             | -1     | 4        | 4
+| i_brand_id       | INT       | 965              | -1     | 4        | 4
+| i_brand          | STRING    | 725              | -1     | 22       | 
16.17760086059
+| i_class_id       | INT       | 16               | -1     | 4        | 4
+| i_class          | STRING    | 101              | -1     | 15       | 
7.767499923706
+| i_category_id    | INT       | 10               | -1     | 4        | 4
+| i_manufact_id    | INT       | 1857             | -1     | 4        | 4
+| i_manufact       | STRING    | 1028             | -1     | 15       | 
11.32950019836
+| i_size           | STRING    | 8                | -1     | 11       | 
4.334599971771
+| i_formulation    | STRING    | 12884            | -1     | 20       | 
19.97999954223
+| i_color          | STRING    | 92               | -1     | 10       | 
5.380899906158
+| i_units          | STRING    | 22               | -1     | 7        | 
4.186900138854
+| i_container      | STRING    | 2                | -1     | 7        | 
6.992599964141
+| i_manager_id     | INT       | 105              | -1     | 4        | 4
+| i_product_name   | STRING    | 19094            | -1     | 25       | 
18.02330017089
+| i_category       | STRING    | 10               | 0      | -1       | -1
++------------------+-----------+------------------+--------+----------+---------------
+</codeblock>
+
+    <p>
+      To remove statistics for particular partitions, use the <codeph>DROP 
INCREMENTAL STATS</codeph> statement.
+      After removing statistics for two partitions, the table-level statistics 
reflect that change in the
+      <codeph>#Rows</codeph> and <codeph>Incremental stats</codeph> fields. 
The counts, maximums, and averages of
+      the column-level statistics are unaffected.
+    </p>
+
+    <note>
+      (It is possible that the row count might be preserved in future after a 
<codeph>DROP INCREMENTAL
+      STATS</codeph> statement. Check the resolution of the issue
+      <xref href="https://issues.cloudera.org/browse/IMPALA-1615"; 
scope="external" format="html">IMPALA-1615</xref>.)
+    </note>
+
+<codeblock>drop incremental stats item_partitioned partition 
(i_category='Sports');
+drop incremental stats item_partitioned partition (i_category='Electronics');
+
+show table stats item_partitioned
++-------------+-------+--------+----------+--------------+---------+------------------
+| i_category  | #Rows | #Files | Size     | Bytes Cached | Format  | 
Incremental stats
++-------------+-------+--------+----------+--------------+---------+------------------
+| Books       | 1733  | 1      | 223.74KB | NOT CACHED   | PARQUET | true
+| Children    | 1786  | 1      | 230.05KB | NOT CACHED   | PARQUET | true
+| Electronics | -1    | 1      | 232.67KB | NOT CACHED   | PARQUET | false
+| Home        | 1807  | 1      | 232.56KB | NOT CACHED   | PARQUET | true
+| Jewelry     | 1740  | 1      | 223.72KB | NOT CACHED   | PARQUET | true
+| Men         | 1811  | 1      | 231.25KB | NOT CACHED   | PARQUET | true
+| Music       | 1860  | 1      | 237.90KB | NOT CACHED   | PARQUET | true
+| Shoes       | 1835  | 1      | 234.90KB | NOT CACHED   | PARQUET | true
+| Sports      | -1    | 1      | 227.97KB | NOT CACHED   | PARQUET | false
+| Women       | 1790  | 1      | 226.27KB | NOT CACHED   | PARQUET | true
+| Total       | 17957 | 10     | 2.25MB   | 0B           |         |
++-------------+-------+--------+----------+--------------+---------+------------------
+show column stats item_partitioned
++------------------+-----------+------------------+--------+----------+---------------
+| Column           | Type      | #Distinct Values | #Nulls | Max Size | Avg 
Size
++------------------+-----------+------------------+--------+----------+---------------
+| i_item_sk        | INT       | 19443            | -1     | 4        | 4
+| i_item_id        | STRING    | 9025             | -1     | 16       | 16
+| i_rec_start_date | TIMESTAMP | 4                | -1     | 16       | 16
+| i_rec_end_date   | TIMESTAMP | 3                | -1     | 16       | 16
+| i_item_desc      | STRING    | 13330            | -1     | 200      | 
100.3028030395
+| i_current_price  | FLOAT     | 2807             | -1     | 4        | 4
+| i_wholesale_cost | FLOAT     | 2105             | -1     | 4        | 4
+| i_brand_id       | INT       | 965              | -1     | 4        | 4
+| i_brand          | STRING    | 725              | -1     | 22       | 
16.17760086059
+| i_class_id       | INT       | 16               | -1     | 4        | 4
+| i_class          | STRING    | 101              | -1     | 15       | 
7.767499923706
+| i_category_id    | INT       | 10               | -1     | 4        | 4
+| i_manufact_id    | INT       | 1857             | -1     | 4        | 4
+| i_manufact       | STRING    | 1028             | -1     | 15       | 
11.32950019836
+| i_size           | STRING    | 8                | -1     | 11       | 
4.334599971771
+| i_formulation    | STRING    | 12884            | -1     | 20       | 
19.97999954223
+| i_color          | STRING    | 92               | -1     | 10       | 
5.380899906158
+| i_units          | STRING    | 22               | -1     | 7        | 
4.186900138854
+| i_container      | STRING    | 2                | -1     | 7        | 
6.992599964141
+| i_manager_id     | INT       | 105              | -1     | 4        | 4
+| i_product_name   | STRING    | 19094            | -1     | 25       | 
18.02330017089
+| i_category       | STRING    | 10               | 0      | -1       | -1
++------------------+-----------+------------------+--------+----------+---------------
+</codeblock>
+
+    <p>
+      To remove all statistics from the table, whether produced by 
<codeph>COMPUTE STATS</codeph> or
+      <codeph>COMPUTE INCREMENTAL STATS</codeph>, use the <codeph>DROP 
STATS</codeph> statement without the
+      <codeph>INCREMENTAL</codeph> clause). Now, both table-level and 
column-level statistics are reset.
+    </p>
+
+<codeblock>drop stats item_partitioned;
+
+show table stats item_partitioned
++-------------+-------+--------+----------+--------------+---------+------------------
+| i_category  | #Rows | #Files | Size     | Bytes Cached | Format  | 
Incremental stats
++-------------+-------+--------+----------+--------------+---------+------------------
+| Books       | -1    | 1      | 223.74KB | NOT CACHED   | PARQUET | false
+| Children    | -1    | 1      | 230.05KB | NOT CACHED   | PARQUET | false
+| Electronics | -1    | 1      | 232.67KB | NOT CACHED   | PARQUET | false
+| Home        | -1    | 1      | 232.56KB | NOT CACHED   | PARQUET | false
+| Jewelry     | -1    | 1      | 223.72KB | NOT CACHED   | PARQUET | false
+| Men         | -1    | 1      | 231.25KB | NOT CACHED   | PARQUET | false
+| Music       | -1    | 1      | 237.90KB | NOT CACHED   | PARQUET | false
+| Shoes       | -1    | 1      | 234.90KB | NOT CACHED   | PARQUET | false
+| Sports      | -1    | 1      | 227.97KB | NOT CACHED   | PARQUET | false
+| Women       | -1    | 1      | 226.27KB | NOT CACHED   | PARQUET | false
+| Total       | -1    | 10     | 2.25MB   | 0B           |         |
++-------------+-------+--------+----------+--------------+---------+------------------
+show column stats item_partitioned
++------------------+-----------+------------------+--------+----------+----------+
+| Column           | Type      | #Distinct Values | #Nulls | Max Size | Avg 
Size |
++------------------+-----------+------------------+--------+----------+----------+
+| i_item_sk        | INT       | -1               | -1     | 4        | 4      
  |
+| i_item_id        | STRING    | -1               | -1     | -1       | -1     
  |
+| i_rec_start_date | TIMESTAMP | -1               | -1     | 16       | 16     
  |
+| i_rec_end_date   | TIMESTAMP | -1               | -1     | 16       | 16     
  |
+| i_item_desc      | STRING    | -1               | -1     | -1       | -1     
  |
+| i_current_price  | FLOAT     | -1               | -1     | 4        | 4      
  |
+| i_wholesale_cost | FLOAT     | -1               | -1     | 4        | 4      
  |
+| i_brand_id       | INT       | -1               | -1     | 4        | 4      
  |
+| i_brand          | STRING    | -1               | -1     | -1       | -1     
  |
+| i_class_id       | INT       | -1               | -1     | 4        | 4      
  |
+| i_class          | STRING    | -1               | -1     | -1       | -1     
  |
+| i_category_id    | INT       | -1               | -1     | 4        | 4      
  |
+| i_manufact_id    | INT       | -1               | -1     | 4        | 4      
  |
+| i_manufact       | STRING    | -1               | -1     | -1       | -1     
  |
+| i_size           | STRING    | -1               | -1     | -1       | -1     
  |
+| i_formulation    | STRING    | -1               | -1     | -1       | -1     
  |
+| i_color          | STRING    | -1               | -1     | -1       | -1     
  |
+| i_units          | STRING    | -1               | -1     | -1       | -1     
  |
+| i_container      | STRING    | -1               | -1     | -1       | -1     
  |
+| i_manager_id     | INT       | -1               | -1     | 4        | 4      
  |
+| i_product_name   | STRING    | -1               | -1     | -1       | -1     
  |
+| i_category       | STRING    | 10               | 0      | -1       | -1     
  |
++------------------+-----------+------------------+--------+----------+----------+
+</codeblock>
+
+    <p conref="../shared/impala_common.xml#common/related_info"/>
+
+    <p>
+      <xref href="impala_compute_stats.xml#compute_stats"/>, <xref 
href="impala_show.xml#show_table_stats"/>,
+      <xref href="impala_show.xml#show_column_stats"/>, <xref 
href="impala_perf_stats.xml#perf_stats"/>
+    </p>
+  </conbody>
+</concept>

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/463ddf92/docs/topics/impala_drop_table.xml
----------------------------------------------------------------------
diff --git a/docs/topics/impala_drop_table.xml 
b/docs/topics/impala_drop_table.xml
new file mode 100644
index 0000000..33cb726
--- /dev/null
+++ b/docs/topics/impala_drop_table.xml
@@ -0,0 +1,142 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE concept PUBLIC "-//OASIS//DTD DITA Concept//EN" "concept.dtd">
+<concept id="drop_table">
+
+  <title>DROP TABLE Statement</title>
+  <titlealts><navtitle>DROP TABLE</navtitle></titlealts>
+  <prolog>
+    <metadata>
+      <data name="Category" value="Impala"/>
+      <data name="Category" value="SQL"/>
+      <data name="Category" value="DDL"/>
+      <data name="Category" value="Tables"/>
+      <data name="Category" value="Schemas"/>
+    </metadata>
+  </prolog>
+
+  <conbody>
+
+    <p>
+      <indexterm audience="Cloudera">DROP TABLE statement</indexterm>
+      Removes an Impala table. Also removes the underlying HDFS data files for 
internal tables, although not for
+      external tables.
+    </p>
+
+    <p conref="../shared/impala_common.xml#common/syntax_blurb"/>
+
+<codeblock>DROP TABLE [IF EXISTS] 
[<varname>db_name</varname>.]<varname>table_name</varname> <ph 
rev="2.3.0">[PURGE]</ph></codeblock>
+
+    <p>
+      <b>IF EXISTS clause:</b>
+    </p>
+
+    <p>
+      The optional <codeph>IF EXISTS</codeph> clause makes the statement 
succeed whether or not the table exists.
+      If the table does exist, it is dropped; if it does not exist, the 
statement has no effect. This capability is
+      useful in standardized setup scripts that remove existing schema objects 
and create new ones. By using some
+      combination of <codeph>IF EXISTS</codeph> for the <codeph>DROP</codeph> 
statements and <codeph>IF NOT
+      EXISTS</codeph> clauses for the <codeph>CREATE</codeph> statements, the 
script can run successfully the first
+      time you run it (when the objects do not exist yet) and subsequent times 
(when some or all of the objects do
+      already exist).
+    </p>
+
+    <p rev="2.3.0">
+      <b>PURGE clause:</b>
+    </p>
+
+    <p rev="2.3.0">
+      The optional <codeph>PURGE</codeph> keyword, available in CDH 5.5 / 
Impala 2.3 and higher,
+      causes Impala to remove the associated HDFS data files
+      immediately, rather than going through the HDFS trashcan mechanism. Use 
this keyword when dropping
+      a table if it is crucial to remove the data as quickly as possible to 
free up space, or if there is
+      a problem with the trashcan, such as the trashcan not being configured 
or being in a different
+      HDFS encryption zone than the data files.
+    </p>
+
+    <p conref="../shared/impala_common.xml#common/ddl_blurb"/>
+
+    <p conref="../shared/impala_common.xml#common/usage_notes_blurb"/>
+
+    <p>
+      By default, Impala removes the associated HDFS directory and data files 
for the table. If you issue a
+      <codeph>DROP TABLE</codeph> and the data files are not deleted, it might 
be for the following reasons:
+    </p>
+
+    <ul>
+      <li>
+        If the table was created with the
+        <codeph><xref 
href="impala_tables.xml#external_tables">EXTERNAL</xref></codeph> clause, 
Impala leaves all
+        files and directories untouched. Use external tables when the data is 
under the control of other Hadoop
+        components, and Impala is only used to query the data files from their 
original locations.
+      </li>
+
+      <li>
+        Impala might leave the data files behind unintentionally, if there is 
no HDFS location available to hold
+        the HDFS trashcan for the <codeph>impala</codeph> user. See
+        <xref href="impala_prereqs.xml#prereqs_account"/> for the procedure to 
set up the required HDFS home
+        directory.
+      </li>
+    </ul>
+
+    <p>
+      Make sure that you are in the correct database before dropping a table, 
either by issuing a
+      <codeph>USE</codeph> statement first or by using a fully qualified name
+      
<codeph><varname>db_name</varname>.<varname>table_name</varname></codeph>.
+    </p>
+
+    <p>
+      If you intend to issue a <codeph>DROP DATABASE</codeph> statement, first 
issue <codeph>DROP TABLE</codeph>
+      statements to remove all the tables in that database.
+    </p>
+
+    <p conref="../shared/impala_common.xml#common/example_blurb"/>
+
+<codeblock>create database temporary;
+use temporary;
+create table unimportant (x int);
+create table trivial (s string);
+-- Drop a table in the current database.
+drop table unimportant;
+-- Switch to a different database.
+use default;
+-- To drop a table in a different database...
+drop table trivial;
+<i>ERROR: AnalysisException: Table does not exist: default.trivial</i>
+-- ...use a fully qualified name.
+drop table temporary.trivial;</codeblock>
+
+    <p conref="../shared/impala_common.xml#common/disk_space_blurb"/>
+
+    <p conref="../shared/impala_common.xml#common/s3_blurb"/>
+    <p rev="2.2.0">
+      Although Impala cannot write new data to a table stored in the Amazon
+      S3 filesystem, the <codeph>DROP TABLE</codeph> statement can remove data 
files from S3
+      if the associated S3 table is an internal table.
+      See <xref href="impala_s3.xml#s3"/> for details about working with S3 
tables.
+    </p>
+
+    <p conref="../shared/impala_common.xml#common/cancel_blurb_no"/>
+
+    <p conref="../shared/impala_common.xml#common/permissions_blurb"/>
+    <p rev="CDH-19187">
+      For an internal table, the user ID that the <cmdname>impalad</cmdname> 
daemon runs under,
+      typically the <codeph>impala</codeph> user, must have write
+      permission for all the files and directories that make up the table.
+    </p>
+    <p>
+      For an external table, dropping the table only involves changes to 
metadata in the metastore database.
+      Because Impala does not remove any HDFS files or directories when 
external tables are dropped,
+      no particular permissions are needed for the associated HDFS files or 
directories.
+    </p>
+
+    <p conref="../shared/impala_common.xml#common/related_info"/>
+
+    <p>
+      <xref href="impala_tables.xml#tables"/>,
+      <xref href="impala_alter_table.xml#alter_table"/>, <xref 
href="impala_create_table.xml#create_table"/>,
+      <xref href="impala_partitioning.xml#partitioning"/>, <xref 
href="impala_tables.xml#internal_tables"/>,
+      <xref href="impala_tables.xml#external_tables"/>
+    </p>
+
+  </conbody>
+</concept>

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/463ddf92/docs/topics/impala_drop_view.xml
----------------------------------------------------------------------
diff --git a/docs/topics/impala_drop_view.xml b/docs/topics/impala_drop_view.xml
new file mode 100644
index 0000000..edcab58
--- /dev/null
+++ b/docs/topics/impala_drop_view.xml
@@ -0,0 +1,48 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE concept PUBLIC "-//OASIS//DTD DITA Concept//EN" "concept.dtd">
+<concept rev="1.1" id="drop_view">
+
+  <title>DROP VIEW Statement</title>
+  <titlealts><navtitle>DROP VIEW</navtitle></titlealts>
+  <prolog>
+    <metadata>
+      <data name="Category" value="Impala"/>
+      <data name="Category" value="SQL"/>
+      <data name="Category" value="DDL"/>
+      <data name="Category" value="Schemas"/>
+      <data name="Category" value="Tables"/>
+    </metadata>
+  </prolog>
+
+  <conbody>
+
+    <p>
+      <indexterm audience="Cloudera">DROP VIEW statement</indexterm>
+      Removes the specified view, which was originally created by the 
<codeph>CREATE VIEW</codeph> statement.
+      Because a view is purely a logical construct (an alias for a query) with 
no physical data behind it,
+      <codeph>DROP VIEW</codeph> only involves changes to metadata in the 
metastore database, not any data files in
+      HDFS.
+    </p>
+
+    <p conref="../shared/impala_common.xml#common/syntax_blurb"/>
+
+<codeblock>DROP VIEW [IF EXISTS] 
[<varname>db_name</varname>.]<varname>view_name</varname></codeblock>
+
+    <p conref="../shared/impala_common.xml#common/ddl_blurb"/>
+
+    <p conref="../shared/impala_common.xml#common/cancel_blurb_no"/>
+
+    <p conref="../shared/impala_common.xml#common/permissions_blurb_no"/>
+
+    <p conref="../shared/impala_common.xml#common/example_blurb"/>
+
+    <p conref="../shared/impala_common.xml#common/create_drop_view_examples"/>
+
+    <p conref="../shared/impala_common.xml#common/related_info"/>
+
+    <p>
+      <xref href="impala_views.xml#views"/>, <xref 
href="impala_create_view.xml#create_view"/>,
+      <xref href="impala_alter_view.xml#alter_view"/>
+    </p>
+  </conbody>
+</concept>

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/463ddf92/docs/topics/impala_exec_single_node_rows_threshold.xml
----------------------------------------------------------------------
diff --git a/docs/topics/impala_exec_single_node_rows_threshold.xml 
b/docs/topics/impala_exec_single_node_rows_threshold.xml
new file mode 100644
index 0000000..fa3007d
--- /dev/null
+++ b/docs/topics/impala_exec_single_node_rows_threshold.xml
@@ -0,0 +1,91 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE concept PUBLIC "-//OASIS//DTD DITA Concept//EN" "concept.dtd">
+<concept rev="2.0.0" id="exec_single_node_rows_threshold" xml:lang="en-US">
+
+  <title>EXEC_SINGLE_NODE_ROWS_THRESHOLD Query Option</title>
+  <prolog>
+    <metadata>
+      <data name="Category" value="Impala"/>
+      <data name="Category" value="Impala Query Options"/>
+      <data name="Category" value="Scalability"/>
+      <data name="Category" value="Performance"/>
+    </metadata>
+  </prolog>
+
+  <conbody>
+
+    <p>
+      <indexterm audience="Cloudera">EXEC_SINGLE_NODE_ROWS_THRESHOLD query 
option</indexterm>
+      This setting controls the cutoff point (in terms of number of rows 
scanned) below which Impala treats a query
+      as a <q>small</q> query, turning off optimizations such as parallel 
execution and native code generation. The
+      overhead for these optimizations is applicable for queries involving 
substantial amounts of data, but it
+      makes sense to skip them for queries involving tiny amounts of data. 
Reducing the overhead for small queries
+      allows Impala to complete them more quickly, keeping YARN resources, 
admission control slots, and so on
+      available for data-intensive queries.
+    </p>
+
+    <p conref="../shared/impala_common.xml#common/syntax_blurb"/>
+
+<codeblock>SET 
EXEC_SINGLE_NODE_ROWS_THRESHOLD=<varname>number_of_rows</varname></codeblock>
+
+    <p>
+      <b>Type:</b> numeric
+    </p>
+
+    <p>
+      <b>Default:</b> 100
+    </p>
+
+    <p>
+      <b>Usage notes:</b> Typically, you increase the default value to make 
this optimization apply to more queries.
+      If incorrect or corrupted table and column statistics cause Impala to 
apply this optimization
+      incorrectly to queries that actually involve substantial work, you might 
see the queries being slower as a
+      result of remote reads. In that case, recompute statistics with the 
<codeph>COMPUTE STATS</codeph>
+      or <codeph>COMPUTE INCREMENTAL STATS</codeph> statement. If there is a 
problem collecting accurate
+      statistics, you can turn this feature off by setting the value to -1.
+    </p>
+
+    <p conref="../shared/impala_common.xml#common/internals_blurb"/>
+
+    <p>
+      This setting applies to query fragments where the amount of data to scan 
can be accurately determined, either
+      through table and column statistics, or by the presence of a 
<codeph>LIMIT</codeph> clause. If Impala cannot
+      accurately estimate the size of the input data, this setting does not 
apply.
+    </p>
+
+    <p rev="2.3.0">
+      In CDH 5.5 / Impala 2.3 and higher, where Impala supports the complex 
data types <codeph>STRUCT</codeph>,
+      <codeph>ARRAY</codeph>, and <codeph>MAP</codeph>, if a query refers to 
any column of those types,
+      the small-query optimization is turned off for that query regardless of 
the
+      <codeph>EXEC_SINGLE_NODE_ROWS_THRESHOLD</codeph> setting.
+    </p>
+
+    <p>
+      For a query that is determined to be <q>small</q>, all work is performed 
on the coordinator node. This might
+      result in some I/O being performed by remote reads. The savings from not 
distributing the query work and not
+      generating native code are expected to outweigh any overhead from the 
remote reads.
+    </p>
+
+    <p conref="../shared/impala_common.xml#common/example_blurb"/>
+
+    <p>
+      A common use case is to query just a few rows from a table to inspect 
typical data values. In this example,
+      Impala does not parallelize the query or perform native code generation 
because the result set is guaranteed
+      to be smaller than the threshold value from this query option:
+    </p>
+
+<codeblock>SET EXEC_SINGLE_NODE_ROWS_THRESHOLD=500;
+SELECT * FROM enormous_table LIMIT 300;
+</codeblock>
+
+<!-- Don't have any other places that tie into this particular optimization 
technique yet.
+Potentially: conceptual topics about code generation, distributed queries
+
+<p conref="/Content/impala_common_xi44078.xml#common/related_info"/>
+<p>
+</p>
+-->
+
+  </conbody>
+
+</concept>

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/463ddf92/docs/topics/impala_explain.xml
----------------------------------------------------------------------
diff --git a/docs/topics/impala_explain.xml b/docs/topics/impala_explain.xml
new file mode 100644
index 0000000..c9e8846
--- /dev/null
+++ b/docs/topics/impala_explain.xml
@@ -0,0 +1,224 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE concept PUBLIC "-//OASIS//DTD DITA Concept//EN" "concept.dtd">
+<concept id="explain">
+
+  <title>EXPLAIN Statement</title>
+  <titlealts><navtitle>EXPLAIN</navtitle></titlealts>
+  <prolog>
+    <metadata>
+      <data name="Category" value="Impala"/>
+      <data name="Category" value="SQL"/>
+      <data name="Category" value="Querying"/>
+      <data name="Category" value="Reports"/>
+      <data name="Category" value="Planning"/>
+      <data name="Category" value="Performance"/>
+      <data name="Category" value="Troubleshooting"/>
+    </metadata>
+  </prolog>
+
+  <conbody>
+
+    <p>
+      <indexterm audience="Cloudera">EXPLAIN statement</indexterm>
+      Returns the execution plan for a statement, showing the low-level 
mechanisms that Impala will use to read the
+      data, divide the work among nodes in the cluster, and transmit 
intermediate and final results across the
+      network. Use <codeph>explain</codeph> followed by a complete 
<codeph>SELECT</codeph> query. For example:
+    </p>
+
+    <p conref="../shared/impala_common.xml#common/syntax_blurb"/>
+
+<codeblock>EXPLAIN { <varname>select_query</varname> | 
<varname>ctas_stmt</varname> | <varname>insert_stmt</varname> }
+</codeblock>
+
+    <p>
+      The <varname>select_query</varname> is a <codeph>SELECT</codeph> 
statement, optionally prefixed by a
+      <codeph>WITH</codeph> clause. See <xref 
href="impala_select.xml#select"/> for details.
+    </p>
+
+    <p>
+      The <varname>insert_stmt</varname> is an <codeph>INSERT</codeph> 
statement that inserts into or overwrites an
+      existing table. It can use either the <codeph>INSERT ... SELECT</codeph> 
or <codeph>INSERT ...
+      VALUES</codeph> syntax. See <xref href="impala_insert.xml#insert"/> for 
details.
+    </p>
+
+    <p>
+      The <varname>ctas_stmt</varname> is a <codeph>CREATE TABLE</codeph> 
statement using the <codeph>AS
+      SELECT</codeph> clause, typically abbreviated as a <q>CTAS</q> 
operation. See
+      <xref href="impala_create_table.xml#create_table"/> for details.
+    </p>
+
+    <p conref="../shared/impala_common.xml#common/usage_notes_blurb"/>
+
+    <p>
+      You can interpret the output to judge whether the query is performing 
efficiently, and adjust the query
+      and/or the schema if not. For example, you might change the tests in the 
<codeph>WHERE</codeph> clause, add
+      hints to make join operations more efficient, introduce subqueries, 
change the order of tables in a join, add
+      or change partitioning for a table, collect column statistics and/or 
table statistics in Hive, or any other
+      performance tuning steps.
+    </p>
+
+    <p>
+      The <codeph>EXPLAIN</codeph> output reminds you if table or column 
statistics are missing from any table
+      involved in the query. These statistics are important for optimizing 
queries involving large tables or
+      multi-table joins. See <xref 
href="impala_compute_stats.xml#compute_stats"/> for how to gather statistics,
+      and <xref href="impala_perf_stats.xml#perf_stats"/> for how to use this 
information for query tuning.
+    </p>
+
+    <p conref="../shared/impala_common.xml#common/explain_interpret"/>
+
+    <p>
+      If you come from a traditional database background and are not familiar 
with data warehousing, keep in mind
+      that Impala is optimized for full table scans across very large tables. 
The structure and distribution of
+      this data is typically not suitable for the kind of indexing and 
single-row lookups that are common in OLTP
+      environments. Seeing a query scan entirely through a large table is 
common, not necessarily an indication of
+      an inefficient query. Of course, if you can reduce the volume of scanned 
data by orders of magnitude, for
+      example by using a query that affects only certain partitions within a 
partitioned table, then you might be
+      able to optimize a query so that it executes in seconds rather than 
minutes.
+    </p>
+
+    <p>
+      For more information and examples to help you interpret 
<codeph>EXPLAIN</codeph> output, see
+      <xref href="impala_explain_plan.xml#perf_explain"/>.
+    </p>
+
+    <p rev="1.2">
+      <b>Extended EXPLAIN output:</b>
+    </p>
+
+    <p rev="1.2">
+      For performance tuning of complex queries, and capacity planning (such 
as using the admission control and
+      resource management features), you can enable more detailed and 
informative output for the
+      <codeph>EXPLAIN</codeph> statement. In the 
<cmdname>impala-shell</cmdname> interpreter, issue the command
+      <codeph>SET EXPLAIN_LEVEL=<varname>level</varname></codeph>, where 
<varname>level</varname> is an integer
+      from 0 to 3 or corresponding mnemonic values <codeph>minimal</codeph>, 
<codeph>standard</codeph>,
+      <codeph>extended</codeph>, or <codeph>verbose</codeph>.
+    </p>
+
+    <p rev="1.2">
+      When extended <codeph>EXPLAIN</codeph> output is enabled, 
<codeph>EXPLAIN</codeph> statements print
+      information about estimated memory requirements, minimum number of 
virtual cores, and so on that you can use
+      to fine-tune the resource management options explained in
+      <xref href="impala_resource_management.xml#rm_options"/>. (The estimated 
memory requirements are
+      intentionally on the high side, to allow a margin for error, to avoid 
cancelling a query unnecessarily if you
+      set the <codeph>MEM_LIMIT</codeph> option to the estimated memory 
figure.)
+    </p>
+
+    <p>
+      See <xref href="impala_explain_level.xml#explain_level"/> for details 
and examples.
+    </p>
+
+    <p conref="../shared/impala_common.xml#common/example_blurb"/>
+
+    <p>
+      This example shows how the standard <codeph>EXPLAIN</codeph> output 
moves from the lowest (physical) level to
+      the higher (logical) levels. The query begins by scanning a certain 
amount of data; each node performs an
+      aggregation operation (evaluating <codeph>COUNT(*)</codeph>) on some 
subset of data that is local to that
+      node; the intermediate results are transmitted back to the coordinator 
node (labelled here as the
+      <codeph>EXCHANGE</codeph> node); lastly, the intermediate results are 
summed to display the final result.
+    </p>
+
+<codeblock id="explain_plan_simple">[impalad-host:21000] &gt; explain select 
count(*) from customer_address;
++----------------------------------------------------------+
+| Explain String                                           |
++----------------------------------------------------------+
+| Estimated Per-Host Requirements: Memory=42.00MB VCores=1 |
+|                                                          |
+| 03:AGGREGATE [MERGE FINALIZE]                            |
+| |  output: sum(count(*))                                 |
+| |                                                        |
+| 02:EXCHANGE [PARTITION=UNPARTITIONED]                    |
+| |                                                        |
+| 01:AGGREGATE                                             |
+| |  output: count(*)                                      |
+| |                                                        |
+| 00:SCAN HDFS [default.customer_address]                  |
+|    partitions=1/1 size=5.25MB                            |
++----------------------------------------------------------+
+</codeblock>
+
+    <p>
+      These examples show how the extended <codeph>EXPLAIN</codeph> output 
becomes more accurate and informative as
+      statistics are gathered by the <codeph>COMPUTE STATS</codeph> statement. 
Initially, much of the information
+      about data size and distribution is marked <q>unavailable</q>. Impala 
can determine the raw data size, but
+      not the number of rows or number of distinct values for each column 
without additional analysis. The
+      <codeph>COMPUTE STATS</codeph> statement performs this analysis, so a 
subsequent <codeph>EXPLAIN</codeph>
+      statement has additional information to use in deciding how to optimize 
the distributed query.
+    </p>
+
+    <draft-comment translate="no">
+Re-run these examples with more substantial tables populated with data.
+</draft-comment>
+
+<codeblock rev="1.2">[localhost:21000] &gt; set explain_level=extended;
+EXPLAIN_LEVEL set to extended
+[localhost:21000] &gt; explain select x from t1;
+[localhost:21000] &gt; explain select x from t1;
++----------------------------------------------------------+
+| Explain String                                           |
++----------------------------------------------------------+
+| Estimated Per-Host Requirements: Memory=32.00MB VCores=1 |
+|                                                          |
+| 01:EXCHANGE [PARTITION=UNPARTITIONED]                    |
+| |  hosts=1 per-host-mem=unavailable                      |
+<b>| |  tuple-ids=0 row-size=4B cardinality=unavailable       |</b>
+| |                                                        |
+| 00:SCAN HDFS [default.t2, PARTITION=RANDOM]              |
+|    partitions=1/1 size=36B                               |
+<b>|    table stats: unavailable                              |</b>
+<b>|    column stats: unavailable                             |</b>
+|    hosts=1 per-host-mem=32.00MB                          |
+<b>|    tuple-ids=0 row-size=4B cardinality=unavailable       |</b>
++----------------------------------------------------------+
+</codeblock>
+
+<codeblock rev="1.2">[localhost:21000] &gt; compute stats t1;
++-----------------------------------------+
+| summary                                 |
++-----------------------------------------+
+| Updated 1 partition(s) and 1 column(s). |
++-----------------------------------------+
+[localhost:21000] &gt; explain select x from t1;
++----------------------------------------------------------+
+| Explain String                                           |
++----------------------------------------------------------+
+| Estimated Per-Host Requirements: Memory=64.00MB VCores=1 |
+|                                                          |
+| 01:EXCHANGE [PARTITION=UNPARTITIONED]                    |
+| |  hosts=1 per-host-mem=unavailable                      |
+| |  tuple-ids=0 row-size=4B cardinality=0                 |
+| |                                                        |
+| 00:SCAN HDFS [default.t1, PARTITION=RANDOM]              |
+|    partitions=1/1 size=36B                               |
+<b>|    table stats: 0 rows total                             |</b>
+<b>|    column stats: all                                     |</b>
+|    hosts=1 per-host-mem=64.00MB                          |
+<b>|    tuple-ids=0 row-size=4B cardinality=0                 |</b>
++----------------------------------------------------------+
+</codeblock>
+
+    <p conref="../shared/impala_common.xml#common/security_blurb"/>
+    <p conref="../shared/impala_common.xml#common/redaction_yes"/>
+
+    <p conref="../shared/impala_common.xml#common/cancel_blurb_no"/>
+
+    <p conref="../shared/impala_common.xml#common/permissions_blurb"/>
+    <p rev="CDH-19187">
+      <!-- Doublecheck these details. Does EXPLAIN really need any 
permissions? -->
+      The user ID that the <cmdname>impalad</cmdname> daemon runs under,
+      typically the <codeph>impala</codeph> user, must have read
+      and execute permissions for all applicable directories in all source 
tables
+      for the query that is being explained.
+      (A <codeph>SELECT</codeph> operation could read files from multiple 
different HDFS directories
+      if the source table is partitioned.)
+    </p>
+
+    <p conref="../shared/impala_common.xml#common/related_info"/>
+    <p>
+      <xref href="impala_select.xml#select"/>,
+      <xref href="impala_insert.xml#insert"/>,
+      <xref href="impala_create_table.xml#create_table"/>,
+      <xref href="impala_explain_plan.xml#explain_plan"/>
+    </p>
+
+  </conbody>
+</concept>

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/463ddf92/docs/topics/impala_explain_level.xml
----------------------------------------------------------------------
diff --git a/docs/topics/impala_explain_level.xml 
b/docs/topics/impala_explain_level.xml
new file mode 100644
index 0000000..f54e8a8
--- /dev/null
+++ b/docs/topics/impala_explain_level.xml
@@ -0,0 +1,338 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE concept PUBLIC "-//OASIS//DTD DITA Concept//EN" "concept.dtd">
+<concept rev="1.2" id="explain_level">
+
+  <title>EXPLAIN_LEVEL Query Option</title>
+  <prolog>
+    <metadata>
+      <data name="Category" value="Impala"/>
+      <data name="Category" value="Impala Query Options"/>
+      <data name="Category" value="Troubleshooting"/>
+      <data name="Category" value="Querying"/>
+      <data name="Category" value="Performance"/>
+      <data name="Category" value="Reports"/>
+    </metadata>
+  </prolog>
+
+  <conbody>
+
+    <p>
+      <indexterm audience="Cloudera">EXPLAIN_LEVEL query option</indexterm>
+      Controls the amount of detail provided in the output of the 
<codeph>EXPLAIN</codeph> statement. The basic
+      output can help you identify high-level performance issues such as 
scanning a higher volume of data or more
+      partitions than you expect. The higher levels of detail show how 
intermediate results flow between nodes and
+      how different SQL operations such as <codeph>ORDER BY</codeph>, 
<codeph>GROUP BY</codeph>, joins, and
+      <codeph>WHERE</codeph> clauses are implemented within a distributed 
query.
+    </p>
+
+    <p>
+      <b>Type:</b> <codeph>STRING</codeph> or <codeph>INT</codeph>
+    </p>
+
+    <p>
+      <b>Default:</b> <codeph>1</codeph>
+    </p>
+
+    <p>
+      <b>Arguments:</b>
+    </p>
+
+    <p>
+      The allowed range of numeric values for this option is 0 to 3:
+    </p>
+
+    <ul>
+      <li>
+        <codeph>0</codeph> or <codeph>MINIMAL</codeph>: A barebones list, one 
line per operation. Primarily useful
+        for checking the join order in very long queries where the regular 
<codeph>EXPLAIN</codeph> output is too
+        long to read easily.
+      </li>
+
+      <li>
+        <codeph>1</codeph> or <codeph>STANDARD</codeph>: The default level of 
detail, showing the logical way that
+        work is split up for the distributed query.
+      </li>
+
+      <li>
+        <codeph>2</codeph> or <codeph>EXTENDED</codeph>: Includes additional 
detail about how the query planner
+        uses statistics in its decision-making process, to understand how a 
query could be tuned by gathering
+        statistics, using query hints, adding or removing predicates, and so 
on.
+      </li>
+
+      <li>
+        <codeph>3</codeph> or <codeph>VERBOSE</codeph>: The maximum level of 
detail, showing how work is split up
+        within each node into <q>query fragments</q> that are connected in a 
pipeline. This extra detail is
+        primarily useful for low-level performance testing and tuning within 
Impala itself, rather than for
+        rewriting the SQL code at the user level.
+      </li>
+    </ul>
+
+    <note>
+      Prior to Impala 1.3, the allowed argument range for 
<codeph>EXPLAIN_LEVEL</codeph> was 0 to 1: level 0 had
+      the mnemonic <codeph>NORMAL</codeph>, and level 1 was 
<codeph>VERBOSE</codeph>. In Impala 1.3 and higher,
+      <codeph>NORMAL</codeph> is not a valid mnemonic value, and 
<codeph>VERBOSE</codeph> still applies to the
+      highest level of detail but now corresponds to level 3. You might need 
to adjust the values if you have any
+      older <codeph>impala-shell</codeph> script files that set the 
<codeph>EXPLAIN_LEVEL</codeph> query option.
+    </note>
+
+    <p>
+      Changing the value of this option controls the amount of detail in the 
output of the <codeph>EXPLAIN</codeph>
+      statement. The extended information from level 2 or 3 is especially 
useful during performance tuning, when
+      you need to confirm whether the work for the query is distributed the 
way you expect, particularly for the
+      most resource-intensive operations such as join queries against large 
tables, queries against tables with
+      large numbers of partitions, and insert operations for Parquet tables. 
The extended information also helps to
+      check estimated resource usage when you use the admission control or 
resource management features explained
+      in <xref href="impala_resource_management.xml#resource_management"/>. See
+      <xref href="impala_explain.xml#explain"/> for the syntax of the 
<codeph>EXPLAIN</codeph> statement, and
+      <xref href="impala_explain_plan.xml#perf_explain"/> for details about 
how to use the extended information.
+    </p>
+
+    <p conref="../shared/impala_common.xml#common/usage_notes_blurb"/>
+
+    <p>
+      As always, read the <codeph>EXPLAIN</codeph> output from bottom to top. 
The lowest lines represent the
+      initial work of the query (scanning data files), the lines in the middle 
represent calculations done on each
+      node and how intermediate results are transmitted from one node to 
another, and the topmost lines represent
+      the final results being sent back to the coordinator node.
+    </p>
+
+    <p>
+      The numbers in the left column are generated internally during the 
initial planning phase and do not
+      represent the actual order of operations, so it is not significant if 
they appear out of order in the
+      <codeph>EXPLAIN</codeph> output.
+    </p>
+
+    <p>
+      At all <codeph>EXPLAIN</codeph> levels, the plan contains a warning if 
any tables in the query are missing
+      statistics. Use the <codeph>COMPUTE STATS</codeph> statement to gather 
statistics for each table and suppress
+      this warning. See <xref href="impala_perf_stats.xml#perf_stats"/> for 
details about how the statistics help
+      query performance.
+    </p>
+
+    <p>
+      The <codeph>PROFILE</codeph> command in <cmdname>impala-shell</cmdname> 
always starts with an explain plan
+      showing full detail, the same as with <codeph>EXPLAIN_LEVEL=3</codeph>. 
<ph rev="1.4.0">After the explain
+      plan comes the executive summary, the same output as produced by the 
<codeph>SUMMARY</codeph> command in
+      <cmdname>impala-shell</cmdname>.</ph>
+    </p>
+
+    <p conref="../shared/impala_common.xml#common/example_blurb"/>
+
+    <p>
+      These examples use a trivial, empty table to illustrate how the 
essential aspects of query planning are shown
+      in <codeph>EXPLAIN</codeph> output:
+    </p>
+
+<codeblock>[localhost:21000] &gt; create table t1 (x int, s string);
+[localhost:21000] &gt; set explain_level=1;
+[localhost:21000] &gt; explain select count(*) from t1;
++------------------------------------------------------------------------------------+
+| Explain String                                                               
      |
++------------------------------------------------------------------------------------+
+| Estimated Per-Host Requirements: Memory=10.00MB VCores=1                     
      |
+| WARNING: The following tables are missing relevant table and/or column 
statistics. |
+| explain_plan.t1                                                              
      |
+|                                                                              
      |
+| 03:AGGREGATE [MERGE FINALIZE]                                                
      |
+| |  output: sum(count(*))                                                     
      |
+| |                                                                            
      |
+| 02:EXCHANGE [PARTITION=UNPARTITIONED]                                        
      |
+| |                                                                            
      |
+| 01:AGGREGATE                                                                 
      |
+| |  output: count(*)                                                          
      |
+| |                                                                            
      |
+| 00:SCAN HDFS [explain_plan.t1]                                               
      |
+|    partitions=1/1 size=0B                                                    
      |
++------------------------------------------------------------------------------------+
+[localhost:21000] &gt; explain select * from t1;
++------------------------------------------------------------------------------------+
+| Explain String                                                               
      |
++------------------------------------------------------------------------------------+
+| Estimated Per-Host Requirements: Memory=-9223372036854775808B VCores=0       
      |
+| WARNING: The following tables are missing relevant table and/or column 
statistics. |
+| explain_plan.t1                                                              
      |
+|                                                                              
      |
+| 01:EXCHANGE [PARTITION=UNPARTITIONED]                                        
      |
+| |                                                                            
      |
+| 00:SCAN HDFS [explain_plan.t1]                                               
      |
+|    partitions=1/1 size=0B                                                    
      |
++------------------------------------------------------------------------------------+
+[localhost:21000] &gt; set explain_level=2;
+[localhost:21000] &gt; explain select * from t1;
++------------------------------------------------------------------------------------+
+| Explain String                                                               
      |
++------------------------------------------------------------------------------------+
+| Estimated Per-Host Requirements: Memory=-9223372036854775808B VCores=0       
      |
+| WARNING: The following tables are missing relevant table and/or column 
statistics. |
+| explain_plan.t1                                                              
      |
+|                                                                              
      |
+| 01:EXCHANGE [PARTITION=UNPARTITIONED]                                        
      |
+| |  hosts=0 per-host-mem=unavailable                                          
      |
+| |  tuple-ids=0 row-size=19B cardinality=unavailable                          
      |
+| |                                                                            
      |
+| 00:SCAN HDFS [explain_plan.t1, PARTITION=RANDOM]                             
      |
+|    partitions=1/1 size=0B                                                    
      |
+|    table stats: unavailable                                                  
      |
+|    column stats: unavailable                                                 
      |
+|    hosts=0 per-host-mem=0B                                                   
      |
+|    tuple-ids=0 row-size=19B cardinality=unavailable                          
      |
++------------------------------------------------------------------------------------+
+[localhost:21000] &gt; set explain_level=3;
+[localhost:21000] &gt; explain select * from t1;
++------------------------------------------------------------------------------------+
+| Explain String                                                               
      |
++------------------------------------------------------------------------------------+
+| Estimated Per-Host Requirements: Memory=-9223372036854775808B VCores=0       
      |
+<b>| WARNING: The following tables are missing relevant table and/or column 
statistics. |</b>
+<b>| explain_plan.t1                                                           
         |</b>
+|                                                                              
      |
+| F01:PLAN FRAGMENT [PARTITION=UNPARTITIONED]                                  
      |
+|   01:EXCHANGE [PARTITION=UNPARTITIONED]                                      
      |
+|      hosts=0 per-host-mem=unavailable                                        
      |
+|      tuple-ids=0 row-size=19B cardinality=unavailable                        
      |
+|                                                                              
      |
+| F00:PLAN FRAGMENT [PARTITION=RANDOM]                                         
      |
+|   DATASTREAM SINK [FRAGMENT=F01, EXCHANGE=01, PARTITION=UNPARTITIONED]       
      |
+|   00:SCAN HDFS [explain_plan.t1, PARTITION=RANDOM]                           
      |
+|      partitions=1/1 size=0B                                                  
      |
+<b>|      table stats: unavailable                                             
         |</b>
+<b>|      column stats: unavailable                                            
         |</b>
+|      hosts=0 per-host-mem=0B                                                 
      |
+|      tuple-ids=0 row-size=19B cardinality=unavailable                        
      |
++------------------------------------------------------------------------------------+
+</codeblock>
+
+    <p>
+      As the warning message demonstrates, most of the information needed for 
Impala to do efficient query
+      planning, and for you to understand the performance characteristics of 
the query, requires running the
+      <codeph>COMPUTE STATS</codeph> statement for the table:
+    </p>
+
+<codeblock>[localhost:21000] &gt; compute stats t1;
++-----------------------------------------+
+| summary                                 |
++-----------------------------------------+
+| Updated 1 partition(s) and 2 column(s). |
++-----------------------------------------+
+[localhost:21000] &gt; explain select * from t1;
++------------------------------------------------------------------------+
+| Explain String                                                         |
++------------------------------------------------------------------------+
+| Estimated Per-Host Requirements: Memory=-9223372036854775808B VCores=0 |
+|                                                                        |
+| F01:PLAN FRAGMENT [PARTITION=UNPARTITIONED]                            |
+|   01:EXCHANGE [PARTITION=UNPARTITIONED]                                |
+|      hosts=0 per-host-mem=unavailable                                  |
+|      tuple-ids=0 row-size=20B cardinality=0                            |
+|                                                                        |
+| F00:PLAN FRAGMENT [PARTITION=RANDOM]                                   |
+|   DATASTREAM SINK [FRAGMENT=F01, EXCHANGE=01, PARTITION=UNPARTITIONED] |
+|   00:SCAN HDFS [explain_plan.t1, PARTITION=RANDOM]                     |
+|      partitions=1/1 size=0B                                            |
+<b>|      table stats: 0 rows total                                         
|</b>
+<b>|      column stats: all                                                 
|</b>
+|      hosts=0 per-host-mem=0B                                           |
+|      tuple-ids=0 row-size=20B cardinality=0                            |
++------------------------------------------------------------------------+
+</codeblock>
+
+    <p>
+      Joins and other complicated, multi-part queries are the ones where you 
most commonly need to examine the
+      <codeph>EXPLAIN</codeph> output and customize the amount of detail in 
the output. This example shows the
+      default <codeph>EXPLAIN</codeph> output for a three-way join query, then 
the equivalent output with a
+      <codeph>[SHUFFLE]</codeph> hint to change the join mechanism between the 
first two tables from a broadcast
+      join to a shuffle join.
+    </p>
+
+<codeblock>[localhost:21000] &gt; set explain_level=1;
+[localhost:21000] &gt; explain select one.*, two.*, three.* from t1 one, t1 
two, t1 three where one.x = two.x and two.x = three.x;
++------------------------------------------------------------------------------------+
+| Explain String                                                               
      |
++------------------------------------------------------------------------------------+
+| Estimated Per-Host Requirements: Memory=4.00GB VCores=3                      
      |
+|                                                                              
      |
+| 07:EXCHANGE [PARTITION=UNPARTITIONED]                                        
      |
+| |                                                                            
      |
+<b>| 04:HASH JOIN [INNER JOIN, BROADCAST]                                      
         |</b>
+| |  hash predicates: two.x = three.x                                          
      |
+| |                                                                            
      |
+<b>| |--06:EXCHANGE [BROADCAST]                                                
         |</b>
+| |  |                                                                         
      |
+| |  02:SCAN HDFS [explain_plan.t1 three]                                      
      |
+| |     partitions=1/1 size=0B                                                 
      |
+| |                                                                            
      |
+<b>| 03:HASH JOIN [INNER JOIN, BROADCAST]                                      
         |</b>
+| |  hash predicates: one.x = two.x                                            
      |
+| |                                                                            
      |
+<b>| |--05:EXCHANGE [BROADCAST]                                                
         |</b>
+| |  |                                                                         
      |
+| |  01:SCAN HDFS [explain_plan.t1 two]                                        
      |
+| |     partitions=1/1 size=0B                                                 
      |
+| |                                                                            
      |
+| 00:SCAN HDFS [explain_plan.t1 one]                                           
      |
+|    partitions=1/1 size=0B                                                    
      |
++------------------------------------------------------------------------------------+
+[localhost:21000] &gt; explain select one.*, two.*, three.* from t1 one join 
[shuffle] t1 two join t1 three where one.x = two.x and two.x = three.x;
++------------------------------------------------------------------------------------+
+| Explain String                                                               
      |
++------------------------------------------------------------------------------------+
+| Estimated Per-Host Requirements: Memory=4.00GB VCores=3                      
      |
+|                                                                              
      |
+| 08:EXCHANGE [PARTITION=UNPARTITIONED]                                        
      |
+| |                                                                            
      |
+<b>| 04:HASH JOIN [INNER JOIN, BROADCAST]                                      
         |</b>
+| |  hash predicates: two.x = three.x                                          
      |
+| |                                                                            
      |
+<b>| |--07:EXCHANGE [BROADCAST]                                                
         |</b>
+| |  |                                                                         
      |
+| |  02:SCAN HDFS [explain_plan.t1 three]                                      
      |
+| |     partitions=1/1 size=0B                                                 
      |
+| |                                                                            
      |
+<b>| 03:HASH JOIN [INNER JOIN, PARTITIONED]                                    
         |</b>
+| |  hash predicates: one.x = two.x                                            
      |
+| |                                                                            
      |
+<b>| |--06:EXCHANGE [PARTITION=HASH(two.x)]                                    
         |</b>
+| |  |                                                                         
      |
+| |  01:SCAN HDFS [explain_plan.t1 two]                                        
      |
+| |     partitions=1/1 size=0B                                                 
      |
+| |                                                                            
      |
+<b>| 05:EXCHANGE [PARTITION=HASH(one.x)]                                       
         |</b>
+| |                                                                            
      |
+| 00:SCAN HDFS [explain_plan.t1 one]                                           
      |
+|    partitions=1/1 size=0B                                                    
      |
++------------------------------------------------------------------------------------+
+</codeblock>
+
+    <p>
+      For a join involving many different tables, the default 
<codeph>EXPLAIN</codeph> output might stretch over
+      several pages, and the only details you care about might be the join 
order and the mechanism (broadcast or
+      shuffle) for joining each pair of tables. In that case, you might set 
<codeph>EXPLAIN_LEVEL</codeph> to its
+      lowest value of 0, to focus on just the join order and join mechanism 
for each stage. The following example
+      shows how the rows from the first and second joined tables are hashed 
and divided among the nodes of the
+      cluster for further filtering; then the entire contents of the third 
table are broadcast to all nodes for the
+      final stage of join processing.
+    </p>
+
+<codeblock>[localhost:21000] &gt; set explain_level=0;
+[localhost:21000] &gt; explain select one.*, two.*, three.* from t1 one join 
[shuffle] t1 two join t1 three where one.x = two.x and two.x = three.x;
++---------------------------------------------------------+
+| Explain String                                          |
++---------------------------------------------------------+
+| Estimated Per-Host Requirements: Memory=4.00GB VCores=3 |
+|                                                         |
+| 08:EXCHANGE [PARTITION=UNPARTITIONED]                   |
+<b>| 04:HASH JOIN [INNER JOIN, BROADCAST]                    |</b>
+<b>| |--07:EXCHANGE [BROADCAST]                              |</b>
+| |  02:SCAN HDFS [explain_plan.t1 three]                 |
+<b>| 03:HASH JOIN [INNER JOIN, PARTITIONED]                  |</b>
+<b>| |--06:EXCHANGE [PARTITION=HASH(two.x)]                  |</b>
+| |  01:SCAN HDFS [explain_plan.t1 two]                   |
+<b>| 05:EXCHANGE [PARTITION=HASH(one.x)]                     |</b>
+| 00:SCAN HDFS [explain_plan.t1 one]                      |
++---------------------------------------------------------+
+</codeblock>
+
+<!-- Consider adding a related info section to collect the xrefs earlier on 
this page. -->
+
+  </conbody>
+</concept>

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/463ddf92/docs/topics/impala_float.xml
----------------------------------------------------------------------
diff --git a/docs/topics/impala_float.xml b/docs/topics/impala_float.xml
new file mode 100644
index 0000000..51e3311
--- /dev/null
+++ b/docs/topics/impala_float.xml
@@ -0,0 +1,94 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE concept PUBLIC "-//OASIS//DTD DITA Concept//EN" "concept.dtd">
+<concept id="float">
+
+  <title>FLOAT Data Type</title>
+  <titlealts><navtitle>FLOAT</navtitle></titlealts>
+  <prolog>
+    <metadata>
+      <data name="Category" value="Impala"/>
+      <data name="Category" value="Impala Data Types"/>
+      <data name="Category" value="SQL"/>
+      <data name="Category" value="Data Analysts"/>
+      <data name="Category" value="Developers"/>
+      <data name="Category" value="Schemas"/>
+    </metadata>
+  </prolog>
+
+  <conbody>
+
+    <p>
+      A single precision floating-point data type used in <codeph>CREATE 
TABLE</codeph> and <codeph>ALTER
+      TABLE</codeph> statements.
+    </p>
+
+    <p conref="../shared/impala_common.xml#common/syntax_blurb"/>
+
+    <p>
+      In the column definition of a <codeph>CREATE TABLE</codeph> statement:
+    </p>
+
+<codeblock><varname>column_name</varname> FLOAT</codeblock>
+
+    <p>
+      <b>Range:</b> 1.40129846432481707e-45 .. 3.40282346638528860e+38, 
positive or negative
+    </p>
+
+    <p>
+      <b>Precision:</b> 6 to 9 significant digits, depending on usage. The 
number of significant digits does
+      not depend on the position of the decimal point.
+    </p>
+
+    <p>
+      <b>Representation:</b> The values are stored in 4 bytes, using
+      <xref 
href="https://en.wikipedia.org/wiki/Single-precision_floating-point_format"; 
scope="external" format="html">IEEE 754 Single Precision Binary Floating 
Point</xref> format.
+    </p>
+
+    <p>
+      <b>Conversions:</b> Impala automatically converts <codeph>FLOAT</codeph> 
to more precise
+      <codeph>DOUBLE</codeph> values, but not the other way around. You can 
use <codeph>CAST()</codeph> to convert
+      <codeph>FLOAT</codeph> values to <codeph>TINYINT</codeph>, 
<codeph>SMALLINT</codeph>, <codeph>INT</codeph>,
+      <codeph>BIGINT</codeph>, <codeph>STRING</codeph>, 
<codeph>TIMESTAMP</codeph>, or <codeph>BOOLEAN</codeph>.
+      You can use exponential notation in <codeph>FLOAT</codeph> literals or 
when casting from
+      <codeph>STRING</codeph>, for example <codeph>1.0e6</codeph> to represent 
one million.
+      <ph conref="../shared/impala_common.xml#common/cast_int_to_timestamp"/>
+    </p>
+
+    <p conref="../shared/impala_common.xml#common/example_blurb"/>
+
+<codeblock>CREATE TABLE t1 (x FLOAT);
+SELECT CAST(1000.5 AS FLOAT);
+</codeblock>
+
+    <p conref="../shared/impala_common.xml#common/partitioning_imprecise"/>
+
+    <p conref="../shared/impala_common.xml#common/hbase_ok"/>
+
+    <p conref="../shared/impala_common.xml#common/parquet_ok"/>
+
+    <p conref="../shared/impala_common.xml#common/text_bulky"/>
+
+<!-- <p 
conref="/Content/impala_common_xi44078.xml#common/compatibility_blurb"/> -->
+
+    <p conref="../shared/impala_common.xml#common/internals_4_bytes"/>
+
+<!-- <p conref="/Content/impala_common_xi44078.xml#common/added_in_20"/> -->
+
+    <p conref="../shared/impala_common.xml#common/column_stats_constant"/>
+
+    <p conref="../shared/impala_common.xml#common/restrictions_blurb"/>
+
+<!-- This conref appears under SUM(), AVG(), FLOAT, and DOUBLE topics. -->
+
+    <p conref="../shared/impala_common.xml#common/sum_double"/>
+
+    <p 
conref="../shared/impala_common.xml#common/float_double_decimal_caveat"/>
+
+    <p conref="../shared/impala_common.xml#common/related_info"/>
+
+    <p>
+      <xref href="impala_literals.xml#numeric_literals"/>, <xref 
href="impala_math_functions.xml#math_functions"/>,
+      <xref href="impala_double.xml#double"/>
+    </p>
+  </conbody>
+</concept>

[12/22] incubator-impala git commit: First try at porting over the source files necessary for the Impala SQL Reference.

Reply via email to