[08/51] [partial] incubator-impala git commit: IMPALA-3398: Add docs to main Impala branch.

jbapple Thu, 17 Nov 2016 15:12:39 -0800

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/3be0f122/docs/topics/impala_schedule_random_replica.xml
----------------------------------------------------------------------
diff --git a/docs/topics/impala_schedule_random_replica.xml 
b/docs/topics/impala_schedule_random_replica.xml
new file mode 100644
index 0000000..39092e8
--- /dev/null
+++ b/docs/topics/impala_schedule_random_replica.xml
@@ -0,0 +1,80 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE concept PUBLIC "-//OASIS//DTD DITA Concept//EN" "concept.dtd">
+<concept id="schedule_random_replica" rev="2.5.0">
+
+  <title>SCHEDULE_RANDOM_REPLICA Query Option (<keyword keyref="impala25"/> or 
higher only)</title>
+  <titlealts 
audience="PDF"><navtitle>SCHEDULE_RANDOM_REPLICA</navtitle></titlealts>
+  <prolog>
+    <metadata>
+      <data name="Category" value="Impala"/>
+      <data name="Category" value="Impala Query Options"/>
+      <data name="Category" value="Performance"/>
+      <data name="Category" value="Developers"/>
+      <data name="Category" value="Data Analysts"/>
+    </metadata>
+  </prolog>
+
+  <conbody>
+
+    <p rev="2.5.0">
+      <indexterm audience="Cloudera">SCHEDULE_RANDOM_REPLICA query 
option</indexterm>
+    </p>
+
+    <p>
+      The <codeph>SCHEDULE_RANDOM_REPLICA</codeph> query option fine-tunes the 
algorithm for deciding which host
+      processes each HDFS data block. It only applies to tables and partitions 
that are not enabled
+      for the HDFS caching feature.
+    </p>
+
+    <p conref="../shared/impala_common.xml#common/type_boolean"/>
+
+    <p conref="../shared/impala_common.xml#common/default_false"/>
+
+    <p conref="../shared/impala_common.xml#common/added_in_250"/>
+
+    <p conref="../shared/impala_common.xml#common/usage_notes_blurb"/>
+
+    <p>
+      In the presence of HDFS cached replicas, Impala randomizes
+      which host processes each cached data block.
+      To ensure that HDFS data blocks are cached on more
+      than one host, use the <codeph>WITH REPLICATION</codeph> clause along 
with
+      the <codeph>CACHED IN</codeph> clause in a
+      <codeph>CREATE TABLE</codeph> or <codeph>ALTER TABLE</codeph> statement.
+      Specify a replication value greater than or equal to the HDFS block 
replication factor.
+    </p>
+
+    <p>
+      The <codeph>SCHEDULE_RANDOM_REPLICA</codeph> query option applies to 
tables and partitions
+      that <i>do not</i> use HDFS caching.
+      By default, Impala estimates how much work each host has done for
+      the query, and selects the host that has the lowest workload.
+      This algorithm is intended to reduce CPU hotspots arising when the
+      same host is selected to process multiple data blocks, but hotspots
+      might still arise for some combinations of queries and data layout.
+      When the <codeph>SCHEDULE_RANDOM_REPLICA</codeph> option is enabled,
+      Impala further randomizes the scheduling algorithm for non-HDFS cached 
blocks,
+      which can further reduce the chance of CPU hotspots.
+    </p>
+
+    <p rev="CDH-43739 IMPALA-2979">
+      This query option works in conjunction with the work scheduling 
improvements
+      in <keyword keyref="impala25_full"/> and higher. The scheduling 
improvements
+      distribute the processing for cached HDFS data blocks to minimize 
hotspots:
+      if a data block is cached on more than one host, Impala chooses which 
host
+      to process each block based on which host has read the fewest bytes 
during
+      the current query. Enable <codeph>SCHEDULE_RANDOM_REPLICA</codeph> 
setting if CPU hotspots
+      still persist because of cases where hosts are <q>tied</q> in terms of
+      the amount of work done; by default, Impala picks the first eligible host
+      in this case.
+    </p>
+
+    <p conref="../shared/impala_common.xml#common/related_info"/>
+    <p>
+      <xref href="impala_perf_hdfs_caching.xml#hdfs_caching"/>,
+      <xref href="impala_scalability.xml#scalability_hotspots"/>
+      , <xref href="impala_replica_preference.xml#replica_preference"/>
+    </p>
+
+  </conbody>
+</concept>


http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/3be0f122/docs/topics/impala_schema_design.xml
----------------------------------------------------------------------
diff --git a/docs/topics/impala_schema_design.xml 
b/docs/topics/impala_schema_design.xml
new file mode 100644
index 0000000..4d08de5
--- /dev/null
+++ b/docs/topics/impala_schema_design.xml
@@ -0,0 +1,222 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE concept PUBLIC "-//OASIS//DTD DITA Concept//EN" "concept.dtd">
+<concept id="schema_design">
+
+  <title>Guidelines for Designing Impala Schemas</title>
+  <titlealts audience="PDF"><navtitle>Designing Schemas</navtitle></titlealts>
+  <prolog>
+    <metadata>
+      <data name="Category" value="Impala"/>
+      <data name="Category" value="Planning"/>
+      <data name="Category" value="Sectionated Pages"/>
+      <data name="Category" value="Proof of Concept"/>
+      <data name="Category" value="Checklists"/>
+      <data name="Category" value="Guidelines"/>
+      <data name="Category" value="Best Practices"/>
+      <data name="Category" value="Performance"/>
+      <data name="Category" value="Compression"/>
+      <data name="Category" value="Tables"/>
+      <data name="Category" value="Schemas"/>
+      <data name="Category" value="SQL"/>
+      <data name="Category" value="Porting"/>
+      <data name="Category" value="Proof of Concept"/>
+      <data name="Category" value="Administrators"/>
+      <data name="Category" value="Developers"/>
+      <data name="Category" value="Data Analysts"/>
+    </metadata>
+  </prolog>
+
+  <conbody>
+
+    <p>
+      The guidelines in this topic help you to construct an optimized and 
scalable schema, one that integrates well
+      with your existing data management processes. Use these guidelines as a 
checklist when doing any
+      proof-of-concept work, porting exercise, or before deploying to 
production.
+    </p>
+
+    <p>
+      If you are adapting an existing database or Hive schema for use with 
Impala, read the guidelines in this
+      section and then see <xref href="impala_porting.xml#porting"/> for 
specific porting and compatibility tips.
+    </p>
+
+    <p outputclass="toc inpage"/>
+
+    <section id="schema_design_text_vs_binary">
+
+      <title>Prefer binary file formats over text-based formats.</title>
+
+      <p>
+        To save space and improve memory usage and query performance, use 
binary file formats for any large or
+        intensively queried tables. Parquet file format is the most efficient 
for data warehouse-style analytic
+        queries. Avro is the other binary file format that Impala supports, 
that you might already have as part of
+        a Hadoop ETL pipeline.
+      </p>
+
+      <p>
+        Although Impala can create and query tables with the RCFile and 
SequenceFile file formats, such tables are
+        relatively bulky due to the text-based nature of those formats, and 
are not optimized for data
+        warehouse-style queries due to their row-oriented layout. Impala does 
not support <codeph>INSERT</codeph>
+        operations for tables with these file formats.
+      </p>
+
+      <p>
+        Guidelines:
+      </p>
+
+      <ul>
+        <li>
+          For an efficient and scalable format for large, performance-critical 
tables, use the Parquet file format.
+        </li>
+
+        <li>
+          To deliver intermediate data during the ETL process, in a format 
that can also be used by other Hadoop
+          components, Avro is a reasonable choice.
+        </li>
+
+        <li>
+          For convenient import of raw data, use a text table instead of 
RCFile or SequenceFile, and convert to
+          Parquet in a later stage of the ETL process.
+        </li>
+      </ul>
+    </section>
+
+    <section id="schema_design_compression">
+
+      <title>Use Snappy compression where practical.</title>
+
+      <p>
+        Snappy compression involves low CPU overhead to decompress, while 
still providing substantial space
+        savings. In cases where you have a choice of compression codecs, such 
as with the Parquet and Avro file
+        formats, use Snappy compression unless you find a compelling reason to 
use a different codec.
+      </p>
+    </section>
+
+    <section id="schema_design_numeric_types">
+
+      <title>Prefer numeric types over strings.</title>
+
+      <p>
+        If you have numeric values that you could treat as either strings or 
numbers (such as
+        <codeph>YEAR</codeph>, <codeph>MONTH</codeph>, and 
<codeph>DAY</codeph> for partition key columns), define
+        them as the smallest applicable integer types. For example, 
<codeph>YEAR</codeph> can be
+        <codeph>SMALLINT</codeph>, <codeph>MONTH</codeph> and 
<codeph>DAY</codeph> can be <codeph>TINYINT</codeph>.
+        Although you might not see any difference in the way partitioned 
tables or text files are laid out on disk,
+        using numeric types will save space in binary formats such as Parquet, 
and in memory when doing queries,
+        particularly resource-intensive queries such as joins.
+      </p>
+    </section>
+
+<!-- Alan suggests not making this recommendation.
+<section id="schema_design_decimal">
+<title>Prefer DECIMAL types over FLOAT and DOUBLE.</title>
+<p>
+</p>
+</section>
+-->
+
+    <section id="schema_design_partitioning">
+
+      <title>Partition, but do not over-partition.</title>
+
+      <p>
+        Partitioning is an important aspect of performance tuning for Impala. 
Follow the procedures in
+        <xref href="impala_partitioning.xml#partitioning"/> to set up 
partitioning for your biggest, most
+        intensively queried tables.
+      </p>
+
+      <p>
+        If you are moving to Impala from a traditional database system, or 
just getting started in the Big Data
+        field, you might not have enough data volume to take advantage of 
Impala parallel queries with your
+        existing partitioning scheme. For example, if you have only a few tens 
of megabytes of data per day,
+        partitioning by <codeph>YEAR</codeph>, <codeph>MONTH</codeph>, and 
<codeph>DAY</codeph> columns might be
+        too granular. Most of your cluster might be sitting idle during 
queries that target a single day, or each
+        node might have very little work to do. Consider reducing the number 
of partition key columns so that each
+        partition directory contains several gigabytes worth of data.
+      </p>
+
+      <p rev="parquet_block_size">
+        For example, consider a Parquet table where each data file is 1 HDFS 
block, with a maximum block size of 1
+        GB. (In Impala 2.0 and later, the default Parquet block size is 
reduced to 256 MB. For this exercise, let's
+        assume you have bumped the size back up to 1 GB by setting the query 
option
+        <codeph>PARQUET_FILE_SIZE=1g</codeph>.) if you have a 10-node cluster, 
you need 10 data files (up to 10 GB)
+        to give each node some work to do for a query. But each core on each 
machine can process a separate data
+        block in parallel. With 16-core machines on a 10-node cluster, a query 
could process up to 160 GB fully in
+        parallel. If there are only a few data files per partition, not only 
are most cluster nodes sitting idle
+        during queries, so are most cores on those machines.
+      </p>
+
+      <p>
+        You can reduce the Parquet block size to as low as 128 MB or 64 MB to 
increase the number of files per
+        partition and improve parallelism. But also consider reducing the 
level of partitioning so that analytic
+        queries have enough data to work with.
+      </p>
+    </section>
+
+    <section id="schema_design_compute_stats">
+
+      <title>Always compute stats after loading data.</title>
+
+      <p>
+        Impala makes extensive use of statistics about data in the overall 
table and in each column, to help plan
+        resource-intensive operations such as join queries and inserting into 
partitioned Parquet tables. Because
+        this information is only available after data is loaded, run the 
<codeph>COMPUTE STATS</codeph> statement
+        on a table after loading or replacing data in a table or partition.
+      </p>
+
+      <p>
+        Having accurate statistics can make the difference between a 
successful operation, or one that fails due to
+        an out-of-memory error or a timeout. When you encounter performance or 
capacity issues, always use the
+        <codeph>SHOW STATS</codeph> statement to check if the statistics are 
present and up-to-date for all tables
+        in the query.
+      </p>
+
+      <p>
+        When doing a join query, Impala consults the statistics for each 
joined table to determine their relative
+        sizes and to estimate the number of rows produced in each join stage. 
When doing an <codeph>INSERT</codeph>
+        into a Parquet table, Impala consults the statistics for the source 
table to determine how to distribute
+        the work of constructing the data files for each partition.
+      </p>
+
+      <p>
+        See <xref href="impala_compute_stats.xml#compute_stats"/> for the 
syntax of the <codeph>COMPUTE
+        STATS</codeph> statement, and <xref 
href="impala_perf_stats.xml#perf_stats"/> for all the performance
+        considerations for table and column statistics.
+      </p>
+    </section>
+
+    <section id="schema_design_explain">
+
+      <title>Verify sensible execution plans with EXPLAIN and SUMMARY.</title>
+
+      <p>
+        Before executing a resource-intensive query, use the 
<codeph>EXPLAIN</codeph> statement to get an overview
+        of how Impala intends to parallelize the query and distribute the 
work. If you see that the query plan is
+        inefficient, you can take tuning steps such as changing file formats, 
using partitioned tables, running the
+        <codeph>COMPUTE STATS</codeph> statement, or adding query hints. For 
information about all of these
+        techniques, see <xref href="impala_performance.xml#performance"/>.
+      </p>
+
+      <p>
+        After you run a query, you can see performance-related information 
about how it actually ran by issuing the
+        <codeph>SUMMARY</codeph> command in <cmdname>impala-shell</cmdname>. 
Prior to Impala 1.4, you would use
+        the <codeph>PROFILE</codeph> command, but its highly technical output 
was only useful for the most
+        experienced users. <codeph>SUMMARY</codeph>, new in Impala 1.4, 
summarizes the most useful information for
+        all stages of execution, for all nodes rather than splitting out 
figures for each node.
+      </p>
+    </section>
+
+<!--
+<section id="schema_design_mem_limits">
+<title>Allocate resources Between Impala and batch jobs (MapReduce, Hive, 
Pig).</title>
+<p>
+</p>
+</section>
+
+<section id="schema_design_cm">
+<title>Use Cloudera Manager to monitor queries and overall performance.</title>
+<p>
+</p>
+</section>
+-->
+  </conbody>
+</concept>

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/3be0f122/docs/topics/impala_schema_objects.xml
----------------------------------------------------------------------
diff --git a/docs/topics/impala_schema_objects.xml 
b/docs/topics/impala_schema_objects.xml
new file mode 100644
index 0000000..a9ddfcc
--- /dev/null
+++ b/docs/topics/impala_schema_objects.xml
@@ -0,0 +1,57 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE concept PUBLIC "-//OASIS//DTD DITA Concept//EN" "concept.dtd">
+<concept id="schema_objects">
+
+  <title>Impala Schema Objects and Object Names</title>
+  <titlealts audience="PDF"><navtitle>Schema Objects and Object 
Names</navtitle></titlealts>
+  <prolog>
+    <metadata>
+      <data name="Category" value="Impala"/>
+      <data name="Category" value="SQL"/>
+      <data name="Category" value="Data Analysts"/>
+      <data name="Category" value="Developers"/>
+      <data name="Category" value="Querying"/>
+      <data name="Category" value="Schemas"/>
+    </metadata>
+  </prolog>
+
+  <conbody>
+
+    <p>
+      <indexterm audience="Cloudera">schema objects</indexterm>
+      With Impala, you work with schema objects that are familiar to database 
users: primarily databases, tables, views,
+      and functions. The SQL syntax to work with these objects is explained in
+      <xref href="impala_langref_sql.xml#langref_sql"/>. This section explains 
the conceptual knowledge you need to
+      work with these objects and the various ways to specify their names.
+    </p>
+
+    <p>
+      Within a table, partitions can also be considered a kind of object. 
Partitioning is an important subject for
+      Impala, with its own documentation section covering use cases and 
performance considerations. See
+      <xref href="impala_partitioning.xml#partitioning"/> for details.
+    </p>
+
+    <p>
+      Impala does not have a counterpart of the <q>tablespace</q> notion from 
some database systems. By default,
+      all the data files for a database, table, or partition are located 
within nested folders within the HDFS file
+      system. You can also specify a particular HDFS location for a given 
Impala table or partition. The raw data
+      for these objects is represented as a collection of data files, 
providing the flexibility to load data by
+      simply moving files into the expected HDFS location.
+    </p>
+
+    <p>
+      Information about the schema objects is held in the
+      <xref href="impala_hadoop.xml#intro_metastore">metastore</xref> 
database. This database is shared between
+      Impala and Hive, allowing each to create, drop, and query each other's 
databases, tables, and so on. When
+      Impala makes a change to schema objects through a 
<codeph>CREATE</codeph>, <codeph>ALTER</codeph>,
+      <codeph>DROP</codeph>, <codeph>INSERT</codeph>, or <codeph>LOAD 
DATA</codeph> statement, it broadcasts those
+      changes to all nodes in the cluster through the <xref 
href="impala_components.xml#intro_catalogd">catalog
+      service</xref>. When you make such changes through Hive or directly 
through manipulating HDFS files, you use
+      the <xref href="impala_refresh.xml#refresh">REFRESH</xref> or
+      <xref 
href="impala_invalidate_metadata.xml#invalidate_metadata">INVALIDATE 
METADATA</xref> statements on the
+      Impala side to recognize the newly loaded data, new tables, and so on.
+    </p>
+
+    <p outputclass="toc"/>
+  </conbody>
+</concept>

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/3be0f122/docs/topics/impala_security.xml
----------------------------------------------------------------------
diff --git a/docs/topics/impala_security.xml b/docs/topics/impala_security.xml
new file mode 100644
index 0000000..bf4606d
--- /dev/null
+++ b/docs/topics/impala_security.xml
@@ -0,0 +1,123 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE concept PUBLIC "-//OASIS//DTD DITA Concept//EN" "concept.dtd">
+<concept id="security">
+
+  <title><ph audience="standalone">Impala Security</ph><ph 
audience="integrated">Overview of Impala Security</ph></title>
+  <prolog>
+    <metadata>
+      <data name="Category" value="Security"/>
+      <data name="Category" value="Impala"/>
+      <data name="Category" value="Concepts"/>
+      <data name="Category" value="Auditing"/>
+      <data name="Category" value="Governance"/>
+      <data name="Category" value="Authentication"/>
+      <data name="Category" value="Authorization"/>
+      <data name="Category" value="Administrators"/>
+    </metadata>
+  </prolog>
+
+  <conbody>
+
+    <p>
+      Impala includes a fine-grained authorization framework for Hadoop, based 
on the Sentry
+      open source project. Sentry authorization was added in Impala 1.1.0. 
Together with the Kerberos authentication
+      framework, Sentry takes Hadoop security to a new level needed for the 
requirements of highly regulated industries
+      such as healthcare, financial services, and government. Impala also 
includes
+      an auditing capability; Impala generates the audit data, the Cloudera 
Navigator product consolidates
+      the audit data from all nodes in the cluster, and Cloudera Manager lets 
you filter, visualize, and produce
+      reports. The auditing feature was added in Impala 1.1.1.
+    </p>
+
+    <p>
+      The Impala security features have several objectives. At the most basic 
level, security prevents
+      accidents or mistakes that could disrupt application processing, delete 
or corrupt data, or reveal data to
+      unauthorized users. More advanced security features and practices can 
harden the system against malicious
+      users trying to gain unauthorized access or perform other disallowed 
operations. The auditing feature
+      provides a way to confirm that no unauthorized access occurred, and 
detect whether any such attempts were
+      made. This is a critical set of features for production deployments in 
large organizations that handle
+      important or sensitive data. It sets the stage for multi-tenancy, where 
multiple applications run
+      concurrently and are prevented from interfering with each other.
+    </p>
+
+    <p>
+      The material in this section presumes that you are already familiar with 
administering secure Linux systems.
+      That is, you should know the general security practices for Linux and 
Hadoop, and their associated commands
+      and configuration files. For example, you should know how to create 
Linux users and groups, manage Linux
+      group membership, set Linux and HDFS file permissions and ownership, and 
designate the default permissions
+      and ownership for new files. You should be familiar with the 
configuration of the nodes in your Hadoop
+      cluster, and know how to apply configuration changes or run a set of 
commands across all the nodes.
+    </p>
+
+    <p>
+      The security features are divided into these broad categories:
+    </p>
+
+    <dl>
+      <dlentry>
+
+        <dt>
+          authorization
+        </dt>
+
+        <dd>
+          Which users are allowed to access which resources, and what 
operations are they allowed to perform?
+          Impala relies on the open source Sentry project for authorization. 
By default (when authorization is not
+          enabled), Impala does all read and write operations with the 
privileges of the <codeph>impala</codeph>
+          user, which is suitable for a development/test environment but not 
for a secure production environment.
+          When authorization is enabled, Impala uses the OS user ID of the 
user who runs
+          <cmdname>impala-shell</cmdname> or other client program, and 
associates various privileges with each
+          user. See <xref href="impala_authorization.xml#authorization"/> for 
details about setting up and managing
+          authorization.
+        </dd>
+
+      </dlentry>
+
+      <dlentry>
+
+        <dt>
+          authentication
+        </dt>
+
+        <dd>
+          How does Impala verify the identity of the user to confirm that they 
really are allowed to exercise the
+          privileges assigned to that user? Impala relies on the Kerberos 
subsystem for authentication. See
+          <xref href="impala_kerberos.xml#kerberos"/> for details about 
setting up and managing authentication.
+        </dd>
+
+      </dlentry>
+
+      <dlentry>
+
+        <dt>
+          auditing
+        </dt>
+
+        <dd>
+          What operations were attempted, and did they succeed or not? This 
feature provides a way to look back and
+          diagnose whether attempts were made to perform unauthorized 
operations. You use this information to track
+          down suspicious activity, and to see where changes are needed in 
authorization policies. The audit data
+          produced by this feature is collected by the Cloudera Manager 
product and then presented in a
+          user-friendly form by the Cloudera Manager product. See <xref 
href="impala_auditing.xml#auditing"/> for
+          details about setting up and managing auditing.
+        </dd>
+
+      </dlentry>
+    </dl>
+
+    <p outputclass="toc"/>
+
+    <p audience="integrated">
+      These other topics in the <cite>Security Guide</cite> cover how Impala 
integrates with security frameworks
+      such as Kerberos, LDAP, and Sentry:
+      <ul>
+        <li>
+          <xref href="impala_authentication.xml#authentication"/>
+        </li>
+
+        <li>
+          <xref href="impala_authorization.xml#authorization"/>
+        </li>
+      </ul>
+    </p>
+  </conbody>
+</concept>

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/3be0f122/docs/topics/impala_security_files.xml
----------------------------------------------------------------------
diff --git a/docs/topics/impala_security_files.xml 
b/docs/topics/impala_security_files.xml
new file mode 100644
index 0000000..befe696
--- /dev/null
+++ b/docs/topics/impala_security_files.xml
@@ -0,0 +1,67 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE concept PUBLIC "-//OASIS//DTD DITA Concept//EN" "concept.dtd">
+<concept id="secure_files">
+
+  <title>Securing Impala Data and Log Files</title>
+  <prolog>
+    <metadata>
+      <data name="Category" value="Impala"/>
+      <data name="Category" value="Security"/>
+      <data name="Category" value="Logs"/>
+      <data name="Category" value="HDFS"/>
+      <data name="Category" value="Administrators"/>
+      <!-- To do for John: mention redaction as a fallback to keep sensitive 
info out of the log files. -->
+    </metadata>
+  </prolog>
+
+  <conbody>
+
+    <p>
+      One aspect of security is to protect files from unauthorized access at 
the filesystem level. For example, if
+      you store sensitive data in HDFS, you specify permissions on the 
associated files and directories in HDFS to
+      restrict read and write permissions to the appropriate users and groups.
+    </p>
+
+    <p>
+      If you issue queries containing sensitive values in the 
<codeph>WHERE</codeph> clause, such as financial
+      account numbers, those values are stored in Impala log files in the 
Linux filesystem and you must secure
+      those files also. For the locations of Impala log files, see <xref 
href="impala_logging.xml#logging"/>.
+    </p>
+
+    <p>
+      All Impala read and write operations are performed under the filesystem 
privileges of the
+      <codeph>impala</codeph> user. The <codeph>impala</codeph> user must be 
able to read all directories and data
+      files that you query, and write into all the directories and data files 
for <codeph>INSERT</codeph> and
+      <codeph>LOAD DATA</codeph> statements. At a minimum, make sure the 
<codeph>impala</codeph> user is in the
+      <codeph>hive</codeph> group so that it can access files and directories 
shared between Impala and Hive. See
+      <xref href="impala_prereqs.xml#prereqs_account"/> for more details.
+    </p>
+
+    <p>
+      Setting file permissions is necessary for Impala to function correctly, 
but is not an effective security
+      practice by itself:
+    </p>
+
+    <ul>
+      <li>
+      <p>
+        The way to ensure that only authorized users can submit requests for 
databases and tables they are allowed
+        to access is to set up Sentry authorization, as explained in
+        <xref href="impala_authorization.xml#authorization"/>. With 
authorization enabled, the checking of the user
+        ID and group is done by Impala, and unauthorized access is blocked by 
Impala itself. The actual low-level
+        read and write requests are still done by the <codeph>impala</codeph> 
user, so you must have appropriate
+        file and directory permissions for that user ID.
+      </p>
+      </li>
+
+      <li>
+      <p>
+        You must also set up Kerberos authentication, as described in <xref 
href="impala_kerberos.xml#kerberos"/>,
+        so that users can only connect from trusted hosts. With Kerberos 
enabled, if someone connects a new host to
+        the network and creates user IDs that match your privileged IDs, they 
will be blocked from connecting to
+        Impala at all from that host.
+      </p>
+      </li>
+    </ul>
+  </conbody>
+</concept>

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/3be0f122/docs/topics/impala_security_guidelines.xml
----------------------------------------------------------------------
diff --git a/docs/topics/impala_security_guidelines.xml 
b/docs/topics/impala_security_guidelines.xml
new file mode 100644
index 0000000..e7713ff
--- /dev/null
+++ b/docs/topics/impala_security_guidelines.xml
@@ -0,0 +1,108 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE concept PUBLIC "-//OASIS//DTD DITA Concept//EN" "concept.dtd">
+<concept rev="1.1" id="security_guidelines">
+
+  <title>Security Guidelines for Impala</title>
+  <prolog>
+    <metadata>
+      <data name="Category" value="Security"/>
+      <data name="Category" value="Impala"/>
+      <data name="Category" value="Planning"/>
+      <data name="Category" value="Guidelines"/>
+      <data name="Category" value="Best Practices"/>
+      <data name="Category" value="Administrators"/>
+    </metadata>
+  </prolog>
+
+  <conbody>
+
+    <p>
+      The following are the major steps to harden a cluster running Impala 
against accidents and mistakes, or
+      malicious attackers trying to access sensitive data:
+    </p>
+
+    <ul>
+      <li>
+      <p>
+        Secure the <codeph>root</codeph> account. The <codeph>root</codeph> 
user can tamper with the
+        <cmdname>impalad</cmdname> daemon, read and write the data files in 
HDFS, log into other user accounts, and
+        access other system services that are beyond the control of Impala.
+      </p>
+      </li>
+
+      <li>
+      <p>
+        Restrict membership in the <codeph>sudoers</codeph> list (in the 
<filepath>/etc/sudoers</filepath> file).
+        The users who can run the <codeph>sudo</codeph> command can do many of 
the same things as the
+        <codeph>root</codeph> user.
+      </p>
+      </li>
+
+      <li>
+      <p>
+        Ensure the Hadoop ownership and permissions for Impala data files are 
restricted.
+      </p>
+      </li>
+
+      <li>
+      <p>
+        Ensure the Hadoop ownership and permissions for Impala log files are 
restricted.
+      </p>
+      </li>
+
+      <li>
+      <p>
+        Ensure that the Impala web UI (available by default on port 25000 on 
each Impala node) is
+        password-protected. See <xref href="impala_webui.xml#webui"/> for 
details.
+      </p>
+      </li>
+
+      <li>
+      <p>
+        Create a policy file that specifies which Impala privileges are 
available to users in particular Hadoop
+        groups (which by default map to Linux OS groups). Create the 
associated Linux groups using the
+        <cmdname>groupadd</cmdname> command if necessary.
+      </p>
+      </li>
+
+      <li>
+      <p>
+        The Impala authorization feature makes use of the HDFS file ownership 
and permissions mechanism; for
+        background information, see the
+        <xref 
href="https://archive.cloudera.com/cdh/3/hadoop/hdfs_permissions_guide.html"; 
scope="external" format="html">CDH
+        HDFS Permissions Guide</xref>. Set up users and assign them to groups 
at the OS level, corresponding to the
+        different categories of users with different access levels for various 
databases, tables, and HDFS
+        locations (URIs). Create the associated Linux users using the 
<cmdname>useradd</cmdname> command if
+        necessary, and add them to the appropriate groups with the 
<cmdname>usermod</cmdname> command.
+      </p>
+      </li>
+
+      <li>
+      <p>
+        Design your databases, tables, and views with database and table 
structure to allow policy rules to specify
+        simple, consistent rules. For example, if all tables related to an 
application are inside a single
+        database, you can assign privileges for that database and use the 
<codeph>*</codeph> wildcard for the table
+        name. If you are creating views with different privileges than the 
underlying base tables, you might put
+        the views in a separate database so that you can use the 
<codeph>*</codeph> wildcard for the database
+        containing the base tables, while specifying the precise names of the 
individual views. (For specifying
+        table or database names, you either specify the exact name or 
<codeph>*</codeph> to mean all the databases
+        on a server, or all the tables and views in a database.)
+      </p>
+      </li>
+
+      <li>
+      <p>
+        Enable authorization by running the <codeph>impalad</codeph> daemons 
with the <codeph>-server_name</codeph>
+        and <codeph>-authorization_policy_file</codeph> options on all nodes. 
(The authorization feature does not
+        apply to the <cmdname>statestored</cmdname> daemon, which has no 
access to schema objects or data files.)
+      </p>
+      </li>
+
+      <li>
+      <p>
+        Set up authentication using Kerberos, to make sure users really are 
who they say they are.
+      </p>
+      </li>
+    </ul>
+  </conbody>
+</concept>

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/3be0f122/docs/topics/impala_security_install.xml
----------------------------------------------------------------------
diff --git a/docs/topics/impala_security_install.xml 
b/docs/topics/impala_security_install.xml
new file mode 100644
index 0000000..56d34bc
--- /dev/null
+++ b/docs/topics/impala_security_install.xml
@@ -0,0 +1,24 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE concept PUBLIC "-//OASIS//DTD DITA Concept//EN" "concept.dtd">
+<concept id="security_install">
+
+  <title>Installation Considerations for Impala Security</title>
+  <prolog>
+    <metadata>
+      <data name="Category" value="Security"/>
+      <data name="Category" value="Impala"/>
+      <data name="Category" value="Administrators"/>
+    </metadata>
+  </prolog>
+
+  <conbody>
+
+    <p>
+      Impala 1.1 comes set up with all the software and settings needed to 
enable security when you run the
+      <cmdname>impalad</cmdname> daemon with the new security-related options 
(<codeph>-server_name</codeph> and
+      <codeph>-authorization_policy_file</codeph>). You do not need to change 
any environment variables or install
+      any additional JAR files. In a cluster managed by Cloudera Manager, you 
do not need to change any settings in
+      Cloudera Manager.
+    </p>
+  </conbody>
+</concept>

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/3be0f122/docs/topics/impala_security_metastore.xml
----------------------------------------------------------------------
diff --git a/docs/topics/impala_security_metastore.xml 
b/docs/topics/impala_security_metastore.xml
new file mode 100644
index 0000000..246333f
--- /dev/null
+++ b/docs/topics/impala_security_metastore.xml
@@ -0,0 +1,40 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE concept PUBLIC "-//OASIS//DTD DITA Concept//EN" "concept.dtd">
+<concept id="security_metastore">
+
+  <title>Securing the Hive Metastore Database</title>
+  <prolog>
+    <metadata>
+      <data name="Category" value="Impala"/>
+      <data name="Category" value="Hive"/>
+      <data name="Category" value="Security"/>
+      <data name="Category" value="Metastore"/>
+      <data name="Category" value="Databases"/>
+      <data name="Category" value="Administrators"/>
+    </metadata>
+  </prolog>
+
+  <conbody>
+
+<!-- Some of this copied from earlier. Split out both instances into conrefs. 
-->
+
+    <p>
+      It is important to secure the Hive metastore, so that users cannot 
access the names or other information
+      about databases and tables the through the Hive client or by querying 
the metastore database. Do this by
+      turning on Hive metastore security, using the instructions in the
+      <xref 
href="http://www.cloudera.com/documentation/enterprise/latest/topics/cdh_sg_hive_security.html";
 scope="external" format="html">CDH 5 Security Guide</xref>
+      for securing different Hive components:
+    </p>
+
+    <ul>
+      <li>
+        Secure the Hive Metastore.
+      </li>
+
+      <li>
+        In addition, allow access to the metastore only from the HiveServer2 
server, and then disable local access
+        to the HiveServer2 server.
+      </li>
+    </ul>
+  </conbody>
+</concept>

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/3be0f122/docs/topics/impala_security_webui.xml
----------------------------------------------------------------------
diff --git a/docs/topics/impala_security_webui.xml 
b/docs/topics/impala_security_webui.xml
new file mode 100644
index 0000000..7ebd2ef
--- /dev/null
+++ b/docs/topics/impala_security_webui.xml
@@ -0,0 +1,66 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE concept PUBLIC "-//OASIS//DTD DITA Concept//EN" "concept.dtd">
+<concept rev="1.1" id="security_webui">
+
+  <title>Securing the Impala Web User Interface</title>
+  <prolog>
+    <metadata>
+      <data name="Category" value="Impala"/>
+      <data name="Category" value="Troubleshooting"/>
+      <data name="Category" value="Security"/>
+      <data name="Category" value="Administrators"/>
+    </metadata>
+  </prolog>
+
+  <conbody>
+
+    <p>
+      The instructions in this section presume you are familiar with the
+      <xref href="http://en.wikipedia.org/wiki/.htpasswd"; scope="external" 
format="html">
+      <filepath>.htpasswd</filepath> mechanism</xref> commonly used to 
password-protect pages on web servers.
+    </p>
+
+    <p>
+      Password-protect the Impala web UI that listens on port 25000 by 
default. Set up a
+      <filepath>.htpasswd</filepath> file in the <codeph>$IMPALA_HOME</codeph> 
directory, or start both the
+      <cmdname>impalad</cmdname> and <cmdname>statestored</cmdname> daemons 
with the
+      <codeph>--webserver_password_file</codeph> option to specify a different 
location (including the filename).
+    </p>
+
+    <p>
+      This file should only be readable by the Impala process and machine 
administrators, because it contains
+      (hashed) versions of passwords. The username / password pairs are not 
derived from Unix usernames, Kerberos
+      users, or any other system. The <codeph>domain</codeph> field in the 
password file must match the domain
+      supplied to Impala by the new command-line option 
<codeph>--webserver_authentication_domain</codeph>. The
+      default is <codeph>mydomain.com</codeph>.
+<!-- Password generator cited by Henry: <xref 
href="http://www.askapache.com/online-tools/htpasswd-generator/"; 
scope="external" format="html"/> -->
+    </p>
+
+    <p>
+      Impala also supports using HTTPS for secure web traffic. To do so, set
+      <codeph>--webserver_certificate_file</codeph> to refer to a valid 
<codeph>.pem</codeph> TLS/SSL certificate file.
+      Impala will automatically start using HTTPS once the TLS/SSL certificate 
has been read and validated. A
+      <codeph>.pem</codeph> file is basically a private key, followed by a 
signed TLS/SSL certificate; make sure to
+      concatenate both parts when constructing the <codeph>.pem</codeph> file.
+<!-- Certificate info cited by Henry: <xref 
href="http://www.akadia.com/services/ssh_test_certificate.html"; 
scope="external" format="html"/>
+This page was very useful for creating a certificate and private key file;
+the last step which was missing was to append one file to the other to make 
the <codeph>.pem</codeph> file. -->
+    </p>
+
+    <p>
+      If Impala cannot find or parse the <codeph>.pem</codeph> file, it prints 
an error message and quits.
+    </p>
+
+    <note>
+      <p>
+        If the private key is encrypted using a passphrase, Impala will ask 
for that passphrase on startup, which
+        is not useful for a large cluster. In that case, remove the passphrase 
and make the <codeph>.pem</codeph>
+        file readable only by Impala and administrators.
+      </p>
+      <p>
+        When you turn on TLS/SSL for the Impala web UI, the associated URLs 
change from <codeph>http://</codeph>
+        prefixes to <codeph>https://</codeph>. Adjust any bookmarks or 
application code that refers to those URLs.
+      </p>
+    </note>
+  </conbody>
+</concept>

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/3be0f122/docs/topics/impala_select.xml
----------------------------------------------------------------------
diff --git a/docs/topics/impala_select.xml b/docs/topics/impala_select.xml
new file mode 100644
index 0000000..321c371
--- /dev/null
+++ b/docs/topics/impala_select.xml
@@ -0,0 +1,206 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE concept PUBLIC "-//OASIS//DTD DITA Concept//EN" "concept.dtd">
+<concept id="select">
+
+  <title>SELECT Statement</title>
+  <titlealts audience="PDF"><navtitle>SELECT</navtitle></titlealts>
+  <prolog>
+    <metadata>
+      <data name="Category" value="Impala"/>
+      <data name="Category" value="SQL"/>
+      <data name="Category" value="Querying"/>
+      <data name="Category" value="Reports"/>
+      <data name="Category" value="Tables"/>
+      <data name="Category" value="Data Analysts"/>
+      <data name="Category" value="Developers"/>
+      <!-- This is such an important statement, think if there are more 
applicable categories. -->
+    </metadata>
+  </prolog>
+
+  <conbody>
+
+    <p>
+      <indexterm audience="Cloudera">SELECT statement</indexterm>
+      The <codeph>SELECT</codeph> statement performs queries, retrieving data 
from one or more tables and producing
+      result sets consisting of rows and columns.
+    </p>
+
+    <p>
+      The Impala <codeph><xref 
href="impala_insert.xml#insert">INSERT</xref></codeph> statement also typically 
ends
+      with a <codeph>SELECT</codeph> statement, to define data to copy from 
one table to another.
+    </p>
+
+    <p conref="../shared/impala_common.xml#common/syntax_blurb"/>
+
+<codeblock>[WITH <i>name</i> AS (<i>select_expression</i>) [, ...] ]
+SELECT
+  [ALL | DISTINCT]
+  [STRAIGHT_JOIN]
+  <i>expression</i> [, <i>expression</i> ...]
+FROM <i>table_reference</i> [, <i>table_reference</i> ...]
+[[FULL | [LEFT | RIGHT] INNER | [LEFT | RIGHT] OUTER | [LEFT | RIGHT] SEMI | 
[LEFT | RIGHT] ANTI | CROSS]
+  JOIN <i>table_reference</i>
+  [ON <i>join_equality_clauses</i> | USING (<varname>col1</varname>[, 
<varname>col2</varname> ...]] ...
+WHERE <i>conditions</i>
+GROUP BY { <i>column</i> | <i>expression</i> [ASC | DESC] [NULLS FIRST | NULLS 
LAST] [, ...] }
+HAVING <codeph>conditions</codeph>
+GROUP BY { <i>column</i> | <i>expression</i> [ASC | DESC] [, ...] }
+LIMIT <i>expression</i> [OFFSET <i>expression</i>]
+[UNION [ALL] <i>select_statement</i>] ...]
+</codeblock>
+
+    <p>
+      Impala <codeph>SELECT</codeph> queries support:
+    </p>
+
+    <ul>
+      <li>
+        SQL scalar data types: <codeph><xref 
href="impala_boolean.xml#boolean">BOOLEAN</xref></codeph>,
+        <codeph><xref 
href="impala_tinyint.xml#tinyint">TINYINT</xref></codeph>,
+        <codeph><xref 
href="impala_smallint.xml#smallint">SMALLINT</xref></codeph>,
+        <codeph><xref href="impala_int.xml#int">INT</xref></codeph>,
+        <codeph><xref href="impala_bigint.xml#bigint">BIGINT</xref></codeph>,
+        <codeph><xref href="impala_decimal.xml#decimal">DECIMAL</xref></codeph>
+        <codeph><xref href="impala_float.xml#float">FLOAT</xref></codeph>,
+        <codeph><xref href="impala_double.xml#double">DOUBLE</xref></codeph>,
+        <codeph><xref 
href="impala_timestamp.xml#timestamp">TIMESTAMP</xref></codeph>,
+        <codeph><xref href="impala_string.xml#string">STRING</xref></codeph>,
+        <codeph><xref 
href="impala_varchar.xml#varchar">VARCHAR</xref></codeph>,
+        <codeph><xref href="impala_char.xml#char">CHAR</xref></codeph>.
+      </li>
+
+<!-- To do: Consider promoting 'querying complex types' to its own subtopic or 
pseudo-heading. -->
+      <li rev="2.3.0">
+        The complex data types <codeph>ARRAY</codeph>, 
<codeph>STRUCT</codeph>, and <codeph>MAP</codeph>,
+        are available in <keyword keyref="impala23_full"/> and higher.
+        Queries involving these types typically involve special qualified names
+        using dot notation for referring to the complex column fields,
+        and join clauses for bringing the complex columns into the result set.
+        See <xref href="impala_complex_types.xml#complex_types"/> for details.
+      </li>
+
+      <li rev="1.1">
+        An optional <xref href="impala_with.xml#with"><codeph>WITH</codeph> 
clause</xref> before the
+        <codeph>SELECT</codeph> keyword, to define a subquery whose name or 
column names can be referenced from
+        later in the main query. This clause lets you abstract repeated 
clauses, such as aggregation functions,
+        that are referenced multiple times in the same query.
+      </li>
+
+      <li>
+        By default, one <codeph>DISTINCT</codeph> clause per query. See <xref 
href="impala_distinct.xml#distinct"/>
+        for details. See <xref 
href="impala_appx_count_distinct.xml#appx_count_distinct"/> for a query option 
to
+        allow multiple <codeph>COUNT(DISTINCT)</codeph> impressions in the 
same query.
+      </li>
+
+      <li>
+        Subqueries in a <codeph>FROM</codeph> clause. In <keyword 
keyref="impala20_full"/> and higher,
+        subqueries can also go in the <codeph>WHERE</codeph> clause, for 
example with the
+        <codeph>IN()</codeph>, <codeph>EXISTS</codeph>, and <codeph>NOT 
EXISTS</codeph> operators.
+      </li>
+
+      <li>
+        <codeph>WHERE</codeph>, <codeph>GROUP BY</codeph>, 
<codeph>HAVING</codeph> clauses.
+      </li>
+
+      <li rev="obwl">
+        <codeph><xref href="impala_order_by.xml#order_by">ORDER 
BY</xref></codeph>. Prior to Impala 1.4.0, Impala
+        required that queries using an <codeph>ORDER BY</codeph> clause also 
include a
+        <codeph><xref href="impala_limit.xml#limit">LIMIT</xref></codeph> 
clause. In Impala 1.4.0 and higher, this
+        restriction is lifted; sort operations that would exceed the Impala 
memory limit automatically use a
+        temporary disk work area to perform the sort.
+      </li>
+
+      <li>
+        <p conref="../shared/impala_common.xml#common/join_types"/>
+        <p>
+          See <xref href="impala_joins.xml#joins"/> for details and examples 
of join queries.
+        </p>
+      </li>
+
+      <li>
+        <codeph>UNION ALL</codeph>.
+      </li>
+
+      <li>
+        <codeph>LIMIT</codeph>.
+      </li>
+
+      <li>
+        External tables.
+      </li>
+
+      <li>
+        Relational operators such as greater than, less than, or equal to.
+      </li>
+
+      <li>
+        Arithmetic operators such as addition or subtraction.
+      </li>
+
+      <li>
+        Logical/Boolean operators <codeph>AND</codeph>, <codeph>OR</codeph>, 
and <codeph>NOT</codeph>. Impala does
+        not support the corresponding symbols <codeph>&amp;&amp;</codeph>, 
<codeph>||</codeph>, and
+        <codeph>!</codeph>.
+      </li>
+
+      <li>
+        Common SQL built-in functions such as <codeph>COUNT</codeph>, 
<codeph>SUM</codeph>, <codeph>CAST</codeph>,
+        <codeph>LIKE</codeph>, <codeph>IN</codeph>, <codeph>BETWEEN</codeph>, 
and <codeph>COALESCE</codeph>. Impala
+        specifically supports built-ins described in <xref 
href="impala_functions.xml#builtins"/>.
+      </li>
+    </ul>
+
+    <p conref="../shared/impala_common.xml#common/ignore_file_extensions"/>
+
+    <p conref="../shared/impala_common.xml#common/security_blurb"/>
+    <p conref="../shared/impala_common.xml#common/redaction_yes"/>
+
+    <p conref="../shared/impala_common.xml#common/s3_blurb"/>
+    <p conref="../shared/impala_common.xml#common/s3_block_splitting"/>
+
+    <p conref="../shared/impala_common.xml#common/cancel_blurb_yes"/>
+
+    <p conref="../shared/impala_common.xml#common/permissions_blurb"/>
+    <p rev="CDH-19187">
+      The user ID that the <cmdname>impalad</cmdname> daemon runs under,
+      typically the <codeph>impala</codeph> user, must have read
+      permissions for the files in all applicable directories in all source 
tables,
+      and read and execute permissions for the relevant data directories.
+      (A <codeph>SELECT</codeph> operation could read files from multiple 
different HDFS directories
+      if the source table is partitioned.)
+      If a query attempts to read a data file and is unable to because of an 
HDFS permission error,
+      the query halts and does not return any further results.
+    </p>
+
+    <p outputclass="toc"/>
+
+    <p conref="../shared/impala_common.xml#common/related_info"/>
+
+    <p>
+      The <codeph>SELECT</codeph> syntax is so extensive that it forms its own 
category of statements: queries. The
+      other major classifications of SQL statements are data definition 
language (see
+      <xref href="impala_ddl.xml#ddl"/>) and data manipulation language (see 
<xref href="impala_dml.xml#dml"/>).
+    </p>
+
+    <p>
+      Because the focus of Impala is on fast queries with interactive response 
times over huge data sets, query
+      performance and scalability are important considerations. See
+      <xref href="impala_performance.xml#performance"/> and <xref 
href="impala_scalability.xml#scalability"/> for
+      details.
+    </p>
+  </conbody>
+
+  <concept id="where" audience="Cloudera">
+
+<!-- WHERE hidden for the moment until there's the chance to add some 
reasonably comprehensive content
+
+     and make it its own file. -->
+
+    <title>WHERE Clause</title>
+
+    <conbody>
+
+      <p/>
+    </conbody>
+  </concept>
+</concept>

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/3be0f122/docs/topics/impala_seq_compression_mode.xml
----------------------------------------------------------------------
diff --git a/docs/topics/impala_seq_compression_mode.xml 
b/docs/topics/impala_seq_compression_mode.xml
new file mode 100644
index 0000000..cb7cb05
--- /dev/null
+++ b/docs/topics/impala_seq_compression_mode.xml
@@ -0,0 +1,29 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE concept PUBLIC "-//OASIS//DTD DITA Concept//EN" "concept.dtd">
+<concept id="seq_compression_mode" rev="2.5.0">
+
+<!-- This option is for internal use only and might go away without ever being 
documented. It's hidden in the DITA map. -->
+
+  <title>SEQ_COMPRESSION_MODE Query Option</title>
+  <prolog>
+    <metadata>
+      <data name="Category" value="Impala"/>
+      <data name="Category" value="Impala Query Options"/>
+    </metadata>
+  </prolog>
+
+  <conbody>
+
+    <p rev="2.5.0">
+      <indexterm audience="Cloudera">RM_INITIAL_MEM query option</indexterm>
+    </p>
+
+    <p>
+      <b>Type:</b>
+    </p>
+
+    <p>
+      <b>Default:</b>
+    </p>
+  </conbody>
+</concept>

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/3be0f122/docs/topics/impala_seqfile.xml
----------------------------------------------------------------------
diff --git a/docs/topics/impala_seqfile.xml b/docs/topics/impala_seqfile.xml
new file mode 100644
index 0000000..860007e
--- /dev/null
+++ b/docs/topics/impala_seqfile.xml
@@ -0,0 +1,239 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE concept PUBLIC "-//OASIS//DTD DITA Concept//EN" "concept.dtd">
+<concept id="seqfile">
+
+  <title id="sequencefile">Using the SequenceFile File Format with Impala 
Tables</title>
+  <titlealts audience="PDF"><navtitle>SequenceFile Data 
Files</navtitle></titlealts>
+  <prolog>
+    <metadata>
+      <data name="Category" value="Impala"/>
+      <!-- <data name="Category" value="SequenceFile"/> -->
+      <data name="Category" value="File Formats"/>
+      <data name="Category" value="Developers"/>
+      <data name="Category" value="Data Analysts"/>
+    </metadata>
+  </prolog>
+
+  <conbody>
+
+    <p>
+      <indexterm audience="Cloudera">SequenceFile support in Impala</indexterm>
+      Impala supports using SequenceFile data files.
+    </p>
+
+    <table>
+      <title>SequenceFile Format Support in Impala</title>
+      <tgroup cols="5">
+        <colspec colname="1" colwidth="10*"/>
+        <colspec colname="2" colwidth="10*"/>
+        <colspec colname="3" colwidth="20*"/>
+        <colspec colname="4" colwidth="30*"/>
+        <colspec colname="5" colwidth="30*"/>
+        <thead>
+          <row>
+            <entry>
+              File Type
+            </entry>
+            <entry>
+              Format
+            </entry>
+            <entry>
+              Compression Codecs
+            </entry>
+            <entry>
+              Impala Can CREATE?
+            </entry>
+            <entry>
+              Impala Can INSERT?
+            </entry>
+          </row>
+        </thead>
+        <tbody>
+          <row 
conref="impala_file_formats.xml#file_formats/sequencefile_support">
+            <entry/>
+          </row>
+        </tbody>
+      </tgroup>
+    </table>
+
+    <p outputclass="toc inpage"/>
+  </conbody>
+
+  <concept id="seqfile_create">
+
+    <title>Creating SequenceFile Tables and Loading Data</title>
+  <prolog>
+    <metadata>
+      <data name="Category" value="ETL"/>
+    </metadata>
+  </prolog>
+
+    <conbody>
+
+      <p>
+        If you do not have an existing data file to use, begin by creating one 
in the appropriate format.
+      </p>
+
+      <p>
+        <b>To create a SequenceFile table:</b>
+      </p>
+
+      <p>
+        In the <codeph>impala-shell</codeph> interpreter, issue a command 
similar to:
+      </p>
+
+<codeblock>create table sequencefile_table (<varname>column_specs</varname>) 
stored as sequencefile;</codeblock>
+
+      <p>
+        Because Impala can query some kinds of tables that it cannot currently 
write to, after creating tables of
+        certain file formats, you might use the Hive shell to load the data. 
See
+        <xref href="impala_file_formats.xml#file_formats"/> for details. After 
loading data into a table through
+        Hive or other mechanism outside of Impala, issue a <codeph>REFRESH 
<varname>table_name</varname></codeph>
+        statement the next time you connect to the Impala node, before 
querying the table, to make Impala recognize
+        the new data.
+      </p>
+
+      <p>
+        For example, here is how you might create some SequenceFile tables in 
Impala (by specifying the columns
+        explicitly, or cloning the structure of another table), load data 
through Hive, and query them through
+        Impala:
+      </p>
+
+<codeblock>$ impala-shell -i localhost
+[localhost:21000] &gt; create table seqfile_table (x int) stored as 
sequencefile;
+[localhost:21000] &gt; create table seqfile_clone like some_other_table stored 
as sequencefile;
+[localhost:21000] &gt; quit;
+
+$ hive
+hive&gt; insert into table seqfile_table select x from some_other_table;
+3 Rows loaded to seqfile_table
+Time taken: 19.047 seconds
+hive&gt; quit;
+
+$ impala-shell -i localhost
+[localhost:21000] &gt; select * from seqfile_table;
+Returned 0 row(s) in 0.23s
+[localhost:21000] &gt; -- Make Impala recognize the data loaded through Hive;
+[localhost:21000] &gt; refresh seqfile_table;
+[localhost:21000] &gt; select * from seqfile_table;
++---+
+| x |
++---+
+| 1 |
+| 2 |
+| 3 |
++---+
+Returned 3 row(s) in 0.23s</codeblock>
+
+      <p 
conref="../shared/impala_common.xml#common/complex_types_unsupported_filetype"/>
+
+    </conbody>
+  </concept>
+
+  <concept id="seqfile_compression">
+
+    <title>Enabling Compression for SequenceFile Tables</title>
+  <prolog>
+    <metadata>
+      <data name="Category" value="Compression"/>
+      <data name="Category" value="Snappy"/>
+    </metadata>
+  </prolog>
+
+    <conbody>
+
+      <p>
+        <indexterm audience="Cloudera">compression</indexterm>
+        You may want to enable compression on existing tables. Enabling 
compression provides performance gains in
+        most cases and is supported for SequenceFile tables. For example, to 
enable Snappy compression, you would
+        specify the following additional settings when loading data through 
the Hive shell:
+      </p>
+
+<codeblock>hive&gt; SET hive.exec.compress.output=true;
+hive&gt; SET mapred.max.split.size=256000000;
+hive&gt; SET mapred.output.compression.type=BLOCK;
+hive&gt; SET 
mapred.output.compression.codec=org.apache.hadoop.io.compress.SnappyCodec;
+hive&gt; insert overwrite table <varname>new_table</varname> select * from 
<varname>old_table</varname>;</codeblock>
+
+      <p>
+        If you are converting partitioned tables, you must complete additional 
steps. In such a case, specify
+        additional settings similar to the following:
+      </p>
+
+<codeblock>hive&gt; create table <varname>new_table</varname> 
(<varname>your_cols</varname>) partitioned by 
(<varname>partition_cols</varname>) stored as <varname>new_format</varname>;
+hive&gt; SET hive.exec.dynamic.partition.mode=nonstrict;
+hive&gt; SET hive.exec.dynamic.partition=true;
+hive&gt; insert overwrite table <varname>new_table</varname> 
partition(<varname>comma_separated_partition_cols</varname>) select * from 
<varname>old_table</varname>;</codeblock>
+
+      <p>
+        Remember that Hive does not require that you specify a source format 
for it. Consider the case of
+        converting a table with two partition columns called 
<codeph>year</codeph> and <codeph>month</codeph> to a
+        Snappy compressed SequenceFile. Combining the components outlined 
previously to complete this table
+        conversion, you would specify settings similar to the following:
+      </p>
+
+<codeblock>hive&gt; create table TBL_SEQ (int_col int, string_col string) 
STORED AS SEQUENCEFILE;
+hive&gt; SET hive.exec.compress.output=true;
+hive&gt; SET mapred.max.split.size=256000000;
+hive&gt; SET mapred.output.compression.type=BLOCK;
+hive&gt; SET 
mapred.output.compression.codec=org.apache.hadoop.io.compress.SnappyCodec;
+hive&gt; SET hive.exec.dynamic.partition.mode=nonstrict;
+hive&gt; SET hive.exec.dynamic.partition=true;
+hive&gt; INSERT OVERWRITE TABLE tbl_seq SELECT * FROM tbl;</codeblock>
+
+      <p>
+        To complete a similar process for a table that includes partitions, 
you would specify settings similar to
+        the following:
+      </p>
+
+<codeblock>hive&gt; CREATE TABLE tbl_seq (int_col INT, string_col STRING) 
PARTITIONED BY (year INT) STORED AS SEQUENCEFILE;
+hive&gt; SET hive.exec.compress.output=true;
+hive&gt; SET mapred.max.split.size=256000000;
+hive&gt; SET mapred.output.compression.type=BLOCK;
+hive&gt; SET 
mapred.output.compression.codec=org.apache.hadoop.io.compress.SnappyCodec;
+hive&gt; SET hive.exec.dynamic.partition.mode=nonstrict;
+hive&gt; SET hive.exec.dynamic.partition=true;
+hive&gt; INSERT OVERWRITE TABLE tbl_seq PARTITION(year) SELECT * FROM 
tbl;</codeblock>
+
+      <note>
+        <p>
+          The compression type is specified in the following command:
+        </p>
+<codeblock>SET 
mapred.output.compression.codec=org.apache.hadoop.io.compress.SnappyCodec;</codeblock>
+        <p>
+          You could elect to specify alternative codecs such as 
<codeph>GzipCodec</codeph> here.
+        </p>
+      </note>
+    </conbody>
+  </concept>
+
+  <concept audience="Cloudera" id="seqfile_data_types">
+
+    <title>Data Type Considerations for SequenceFile Tables</title>
+
+    <conbody>
+
+      <p></p>
+    </conbody>
+  </concept>
+
+  <concept id="seqfile_performance">
+
+    <title>Query Performance for Impala SequenceFile Tables</title>
+
+    <conbody>
+
+      <p>
+        In general, expect query performance with SequenceFile tables to be
+        faster than with tables using text data, but slower than with
+        Parquet tables. See <xref href="impala_parquet.xml#parquet"/>
+        for information about using the Parquet file format for
+        high-performance analytic queries.
+      </p>
+
+      <p conref="../shared/impala_common.xml#common/s3_block_splitting"/>
+
+    </conbody>
+  </concept>
+
+</concept>

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/3be0f122/docs/topics/impala_set.xml
----------------------------------------------------------------------
diff --git a/docs/topics/impala_set.xml b/docs/topics/impala_set.xml
new file mode 100644
index 0000000..f477269
--- /dev/null
+++ b/docs/topics/impala_set.xml
@@ -0,0 +1,192 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE concept PUBLIC "-//OASIS//DTD DITA Concept//EN" "concept.dtd">
+<concept rev="2.0.0" id="set">
+
+  <title>SET Statement</title>
+  <titlealts audience="PDF"><navtitle>SET</navtitle></titlealts>
+  <prolog>
+    <metadata>
+      <data name="Category" value="Impala"/>
+      <data name="Category" value="SQL"/>
+      <data name="Category" value="Querying"/>
+      <data name="Category" value="Configuring"/>
+      <data name="Category" value="Developers"/>
+      <data name="Category" value="Data Analysts"/>
+    </metadata>
+  </prolog>
+
+  <conbody>
+
+    <p rev="2.0.0">
+      <indexterm audience="Cloudera">SET statement</indexterm>
+      Specifies values for query options that control the runtime behavior of 
other statements within the same
+      session.
+    </p>
+
+    <p rev="2.5.0 IMPALA-2180">
+      In <keyword keyref="impala25_full"/> and higher, <codeph>SET</codeph> 
also defines user-specified substitution variables for
+      the <cmdname>impala-shell</cmdname> interpreter. This feature uses the 
<codeph>SET</codeph> command
+      built into <cmdname>impala-shell</cmdname> instead of the SQL 
<codeph>SET</codeph> statement.
+      Therefore the substitution mechanism only works with queries processed 
by <cmdname>impala-shell</cmdname>,
+      not with queries submitted through JDBC or ODBC.
+    </p>
+
+    <p conref="../shared/impala_common.xml#common/syntax_blurb"/>
+
+<codeblock>SET 
[<varname>query_option</varname>=<varname>option_value</varname>]
+</codeblock>
+
+    <p>
+      <codeph>SET</codeph> with no arguments returns a result set consisting 
of all available query options and
+      their current values.
+    </p>
+
+    <p>
+      The query option name and any string argument values are 
case-insensitive.
+    </p>
+
+    <p>
+      Each query option has a specific allowed notation for its arguments. 
Boolean options can be enabled and
+      disabled by assigning values of either <codeph>true</codeph> and 
<codeph>false</codeph>, or
+      <codeph>1</codeph> and <codeph>0</codeph>. Some numeric options accept a 
final character signifying the unit,
+      such as <codeph>2g</codeph> for 2 gigabytes or <codeph>100m</codeph> for 
100 megabytes. See
+      <xref href="impala_query_options.xml#query_options"/> for the details of 
each query option.
+    </p>
+
+    <p>
+      <b>User-specified substitution variables:</b>
+    </p>
+
+    <p rev="2.5.0 IMPALA-2180">
+      In <keyword keyref="impala25_full"/> and higher, you can specify your 
own names and string substitution values
+      within the <cmdname>impala-shell</cmdname> interpreter. Once a 
substitution variable is set up,
+      its value is inserted into any SQL statement in that same 
<cmdname>impala-shell</cmdname> session
+      that contains the notation 
<codeph>${var:<varname>varname</varname>}</codeph>.
+      Using <codeph>SET</codeph> in an interactive 
<cmdname>impala-shell</cmdname> session overrides
+      any value for that same variable passed in through the 
<codeph>--var=<varname>varname</varname>=<varname>value</varname></codeph>
+      command-line option.
+    </p>
+
+    <p rev="2.5.0 IMPALA-2180">
+      For example, to set up some default parameters for report queries, but 
then override those default
+      within an <cmdname>impala-shell</cmdname> session, you might issue 
commands and statements such as
+      the following:
+    </p>
+
+<codeblock rev="2.5.0 IMPALA-2180">
+-- Initial setup for this example.
+create table staging_table (s string);
+insert into staging_table values ('foo'), ('bar'), ('bletch');
+
+create table production_table (s string);
+insert into production_table values ('North America'), ('EMEA'), ('Asia');
+quit;
+
+-- Start impala-shell with user-specified substitution variables,
+-- run a query, then override the variables with SET and run the query again.
+$ impala-shell --var=table_name=staging_table --var=cutoff=2
+... <varname>banner message</varname> ...
+[localhost:21000] > select s from ${var:table_name} order by s limit 
${var:cutoff};
+Query: select s from staging_table order by s limit 2
++--------+
+| s      |
++--------+
+| bar    |
+| bletch |
++--------+
+Fetched 2 row(s) in 1.06s
+
+[localhost:21000] > set var:table_name=production_table;
+Variable TABLE_NAME set to production_table
+[localhost:21000] > set var:cutoff=3;
+Variable CUTOFF set to 3
+
+[localhost:21000] > select s from ${var:table_name} order by s limit 
${var:cutoff};
+Query: select s from production_table order by s limit 3
++---------------+
+| s             |
++---------------+
+| Asia          |
+| EMEA          |
+| North America |
++---------------+
+</codeblock>
+
+    <p rev="2.5.0 IMPALA-2180">
+      The following example shows how <codeph>SET</codeph> with no parameters 
displays
+      all user-specified substitution variables, and how 
<codeph>UNSET</codeph> removes
+      the substitution variable entirely:
+    </p>
+
+<codeblock rev="2.5.0 IMPALA-2180">
+[localhost:21000] > set;
+Query options (defaults shown in []):
+  ABORT_ON_DEFAULT_LIMIT_EXCEEDED: [0]
+  ...
+  V_CPU_CORES: [0]
+
+Shell Options
+  LIVE_PROGRESS: False
+  LIVE_SUMMARY: False
+
+Variables:
+  CUTOFF: 3
+  TABLE_NAME: staging_table
+
+[localhost:21000] > unset var:cutoff;
+Unsetting variable CUTOFF
+[localhost:21000] > select s from ${var:table_name} order by s limit 
${var:cutoff};
+Error: Unknown variable CUTOFF
+</codeblock>
+
+    <p rev="2.5.0 IMPALA-2180">
+      See <xref href="impala_shell_running_commands.xml"/> for more examples 
of using the
+      <codeph>--var</codeph>, <codeph>SET</codeph>, and 
<codeph>${var:<varname>varname</varname>}</codeph>
+      substitution technique in <cmdname>impala-shell</cmdname>.
+    </p>
+
+    <p conref="../shared/impala_common.xml#common/usage_notes_blurb"/>
+
+    <p>
+      <codeph>MEM_LIMIT</codeph> is probably the most commonly used query 
option. You can specify a high value to
+      allow a resource-intensive query to complete. For testing how queries 
would work on memory-constrained
+      systems, you might specify an artificially low value.
+    </p>
+
+    <p conref="../shared/impala_common.xml#common/complex_types_blurb"/>
+
+    <p conref="../shared/impala_common.xml#common/example_blurb"/>
+
+    <p>
+      The following example sets some numeric and some Boolean query options 
to control usage of memory, disk
+      space, and timeout periods, then runs a query whose success could depend 
on the options in effect:
+    </p>
+
+<codeblock>set mem_limit=64g;
+set DISABLE_UNSAFE_SPILLS=true;
+set parquet_file_size=400m;
+set RESERVATION_REQUEST_TIMEOUT=900000;
+insert overwrite parquet_table select c1, c2, count(c3) from text_table group 
by c1, c2, c3;
+</codeblock>
+
+    <p conref="../shared/impala_common.xml#common/added_in_20"/>
+
+    <p>
+      <codeph>SET</codeph> has always been available as an 
<cmdname>impala-shell</cmdname> command. Promoting it to
+      a SQL statement lets you use this feature in client applications through 
the JDBC and ODBC APIs.
+    </p>
+
+<!-- <p conref="../shared/impala_common.xml#common/jdbc_blurb"/> -->
+
+    <p conref="../shared/impala_common.xml#common/cancel_blurb_no"/>
+
+    <p conref="../shared/impala_common.xml#common/permissions_blurb_no"/>
+
+    <p conref="../shared/impala_common.xml#common/related_info"/>
+
+    <p>
+      See <xref href="impala_query_options.xml#query_options"/> for the query 
options you can adjust using this
+      statement.
+    </p>
+  </conbody>
+</concept>

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/3be0f122/docs/topics/impala_shell_commands.xml
----------------------------------------------------------------------
diff --git a/docs/topics/impala_shell_commands.xml 
b/docs/topics/impala_shell_commands.xml
new file mode 100644
index 0000000..885fd16
--- /dev/null
+++ b/docs/topics/impala_shell_commands.xml
@@ -0,0 +1,399 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE concept PUBLIC "-//OASIS//DTD DITA Concept//EN" "concept.dtd">
+<concept id="shell_commands">
+
+  <title>impala-shell Command Reference</title>
+  <titlealts audience="PDF"><navtitle>Command Reference</navtitle></titlealts>
+  <prolog>
+    <metadata>
+      <data name="Category" value="Impala"/>
+      <data name="Category" value="impala-shell"/>
+      <data name="Category" value="SQL"/>
+      <data name="Category" value="Data Analysts"/>
+      <data name="Category" value="Developers"/>
+    </metadata>
+  </prolog>
+
+  <conbody>
+
+    <p>
+      <indexterm audience="Cloudera">impala-shell</indexterm>
+      Use the following commands within <codeph>impala-shell</codeph> to pass 
requests to the
+      <codeph>impalad</codeph> daemon that the shell is connected to. You can 
enter a command interactively at the
+      prompt, or pass it as the argument to the <codeph>-q</codeph> option of 
<codeph>impala-shell</codeph>. Most
+      of these commands are passed to the Impala daemon as SQL statements; 
refer to the corresponding
+      <xref href="impala_langref_sql.xml#langref_sql">SQL language reference 
sections</xref> for full syntax
+      details.
+    </p>
+
+    <table>
+      <tgroup cols="2">
+        <colspec colname="1" colwidth="10*"/>
+        <colspec colname="2" colwidth="40*"/>
+        <thead>
+          <row>
+            <entry>
+              Command
+            </entry>
+            <entry>
+              Explanation
+            </entry>
+          </row>
+        </thead>
+        <tbody>
+          <row id="alter_cmd">
+            <entry>
+              <p>
+                <codeph>alter</codeph>
+              </p>
+            </entry>
+            <entry>
+              <p>
+                Changes the underlying structure or settings of an Impala 
table, or a table shared between Impala
+                and Hive. See <xref 
href="impala_alter_table.xml#alter_table"/> and
+                <xref href="impala_alter_view.xml#alter_view"/> for details.
+              </p>
+            </entry>
+          </row>
+          <row rev="1.2.2" id="compute_cmd">
+            <entry>
+              <p>
+                <codeph>compute stats</codeph>
+              </p>
+            </entry>
+            <entry>
+              <p>
+                Gathers important performance-related information for a table, 
used by Impala to optimize queries.
+                See <xref href="impala_compute_stats.xml#compute_stats"/> for 
details.
+              </p>
+            </entry>
+          </row>
+          <row id="connect_cmd">
+            <entry>
+              <p>
+                <codeph>connect</codeph>
+              </p>
+            </entry>
+            <entry>
+              <p>
+                Connects to the specified instance of 
<codeph>impalad</codeph>. The default port of 21000 is
+                assumed unless you provide another value. You can connect to 
any host in your cluster that is
+                running <codeph>impalad</codeph>. If you connect to an 
instance of <codeph>impalad</codeph> that
+                was started with an alternate port specified by the 
<codeph>--fe_port</codeph> flag, you must
+                provide that alternate port. See <xref 
href="impala_connecting.xml#connecting"/> for examples.
+              </p>
+
+              <p conref="../shared/impala_common.xml#common/set_vs_connect"/>
+            </entry>
+          </row>
+          <row id="describe_cmd">
+            <entry>
+              <p>
+                <codeph>describe</codeph>
+              </p>
+            </entry>
+            <entry>
+              <p>
+                Shows the columns, column data types, and any column comments 
for a specified table.
+                <codeph>DESCRIBE FORMATTED</codeph> shows additional 
information such as the HDFS data directory,
+                partitions, and internal properties for the table. See <xref 
href="impala_describe.xml#describe"/>
+                for details about the basic <codeph>DESCRIBE</codeph> output 
and the <codeph>DESCRIBE
+                FORMATTED</codeph> variant. You can use <codeph>DESC</codeph> 
as shorthand for the
+                <codeph>DESCRIBE</codeph> command.
+              </p>
+            </entry>
+          </row>
+          <row id="drop_cmd">
+            <entry>
+              <p>
+                <codeph>drop</codeph>
+              </p>
+            </entry>
+            <entry>
+              <p>
+                Removes a schema object, and in some cases its associated data 
files. See
+                <xref href="impala_drop_table.xml#drop_table"/>, <xref 
href="impala_drop_view.xml#drop_view"/>,
+                <xref href="impala_drop_database.xml#drop_database"/>, and
+                <xref href="impala_drop_function.xml#drop_function"/> for 
details.
+              </p>
+            </entry>
+          </row>
+          <row id="explain_cmd">
+            <entry>
+              <p>
+                <codeph>explain</codeph>
+              </p>
+            </entry>
+            <entry>
+              <p>
+                Provides the execution plan for a query. 
<codeph>EXPLAIN</codeph> represents a query as a series of
+                steps. For example, these steps might be map/reduce stages, 
metastore operations, or file system
+                operations such as move or rename. See <xref 
href="impala_explain.xml#explain"/> and
+                <xref href="impala_explain_plan.xml#perf_explain"/> for 
details.
+              </p>
+            </entry>
+          </row>
+          <row id="help_cmd">
+            <entry>
+              <p>
+                <codeph>help</codeph>
+              </p>
+            </entry>
+            <entry>
+              <p>
+                Help provides a list of all available commands and options.
+              </p>
+            </entry>
+          </row>
+          <row id="history_cmd">
+            <entry>
+              <p>
+                <codeph>history</codeph>
+              </p>
+            </entry>
+            <entry>
+              <p>
+                Maintains an enumerated cross-session command history. This 
history is stored in the
+                <filepath>~/.impalahistory</filepath> file.
+              </p>
+            </entry>
+          </row>
+          <row id="insert_cmd">
+            <entry>
+              <p>
+                <codeph>insert</codeph>
+              </p>
+            </entry>
+            <entry>
+              <p>
+                Writes the results of a query to a specified table. This 
either overwrites table data or appends
+                data to the existing table content. See <xref 
href="impala_insert.xml#insert"/> for details.
+              </p>
+            </entry>
+          </row>
+          <row id="invalidate_metadata_cmd">
+            <entry>
+              <p>
+                <codeph>invalidate metadata</codeph>
+              </p>
+            </entry>
+            <entry>
+              <p>
+                Updates <cmdname>impalad</cmdname> metadata for table 
existence and structure. Use this command
+                after creating, dropping, or altering databases, tables, or 
partitions in Hive. See
+                <xref 
href="impala_invalidate_metadata.xml#invalidate_metadata"/> for details.
+              </p>
+            </entry>
+          </row>
+          <row id="profile_cmd">
+            <entry>
+              <p>
+                <codeph>profile</codeph>
+              </p>
+            </entry>
+            <entry>
+              <p>
+                Displays low-level information about the most recent query. 
Used for performance diagnosis and
+                tuning. <ph rev="1.4.0"> The report starts with the same 
information as produced by the
+                <codeph>EXPLAIN</codeph> statement and the 
<codeph>SUMMARY</codeph> command.</ph> See
+                <xref href="impala_explain_plan.xml#perf_profile"/> for 
details.
+              </p>
+            </entry>
+          </row>
+          <row id="quit_cmd">
+            <entry>
+              <p>
+                <codeph>quit</codeph>
+              </p>
+            </entry>
+            <entry>
+              <p>
+                Exits the shell. Remember to include the final semicolon so 
that the shell recognizes the end of
+                the command.
+              </p>
+            </entry>
+          </row>
+          <row id="refresh_cmd">
+            <entry>
+              <p>
+                <codeph>refresh</codeph>
+              </p>
+            </entry>
+            <entry>
+              <p>
+                Refreshes <cmdname>impalad</cmdname> metadata for the 
locations of HDFS blocks corresponding to
+                Impala data files. Use this command after loading new data 
files into an Impala table through Hive
+                or through HDFS commands. See <xref 
href="impala_refresh.xml#refresh"/> for details.
+              </p>
+            </entry>
+          </row>
+          <row id="select_cmd">
+            <entry>
+              <p>
+                <codeph>select</codeph>
+              </p>
+            </entry>
+            <entry>
+              <p>
+                Specifies the data set on which to complete some action. All 
information returned from
+                <codeph>select</codeph> can be sent to some output such as the 
console or a file or can be used to
+                complete some other element of query. See <xref 
href="impala_select.xml#select"/> for details.
+              </p>
+            </entry>
+          </row>
+          <row id="set_cmd">
+            <entry>
+              <p>
+                <codeph>set</codeph>
+              </p>
+            </entry>
+            <entry>
+              <p>
+                Manages query options for an <cmdname>impala-shell</cmdname> 
session. The available options are the
+                ones listed in <xref 
href="impala_query_options.xml#query_options"/>. These options are used for
+                query tuning and troubleshooting. Issue <codeph>SET</codeph> 
with no arguments to see the current
+                query options, either based on the <cmdname>impalad</cmdname> 
defaults, as specified by you at
+                <cmdname>impalad</cmdname> startup, or based on earlier 
<codeph>SET</codeph> statements in the same
+                session. To modify option values, issue commands with the 
syntax <codeph>set
+                <varname>option</varname>=<varname>value</varname></codeph>. 
To restore an option to its default,
+                use the <codeph>unset</codeph> command. Some options take 
Boolean values of <codeph>true</codeph>
+                and <codeph>false</codeph>. Others take numeric arguments, or 
quoted string values.
+              </p>
+
+              <p conref="../shared/impala_common.xml#common/set_vs_connect"/>
+
+              <p rev="2.0.0">
+                In Impala 2.0 and later, <codeph>SET</codeph> is available as 
a SQL statement for any kind of
+                application, not only through <cmdname>impala-shell</cmdname>. 
See
+                <xref href="impala_set.xml#set"/> for details.
+              </p>
+
+              <p rev="2.5.0 IMPALA-2180">
+                In Impala 2.5 and later, you can use <codeph>SET</codeph> to 
define your own substitution variables
+                within an <cmdname>impala-shell</cmdname> session.
+                Within a SQL statement, you substitute the value by using the 
notation <codeph>${var:<varname>variable_name</varname>}</codeph>.
+              </p>
+            </entry>
+          </row>
+          <row id="shell_cmd">
+            <entry>
+              <p>
+                <codeph>shell</codeph>
+              </p>
+            </entry>
+            <entry>
+              <p>
+                Executes the specified command in the operating system shell 
without exiting
+                <codeph>impala-shell</codeph>. You can use the 
<codeph>!</codeph> character as shorthand for the
+                <codeph>shell</codeph> command.
+              </p>
+
+              <note>
+                Quote any instances of the <codeph>--</codeph> or 
<codeph>/*</codeph> tokens to avoid them being
+                interpreted as the start of a comment. To embed comments 
within <codeph>source</codeph> or
+                <codeph>!</codeph> commands, use the shell comment character 
<codeph>#</codeph> before the comment
+                portion of the line.
+              </note>
+            </entry>
+          </row>
+          <row id="show_cmd">
+            <entry>
+              <p>
+                <codeph>show</codeph>
+              </p>
+            </entry>
+            <entry>
+              <p>
+                Displays metastore data for schema objects created and 
accessed through Impala, Hive, or both.
+                <codeph>show</codeph> can be used to gather information about 
objects such as databases, tables, and functions.
+                See <xref href="impala_show.xml#show"/> for details.
+              </p>
+            </entry>
+          </row>
+          <row id="source_cmd" rev="IMPALA-3397 CDH-40097">
+            <entry>
+              <p>
+                <codeph>source</codeph> or <codeph>src</codeph>
+              </p>
+            </entry>
+            <entry>
+              <p>
+                Executes one or more statements residing in a specified file 
from the local filesystem.
+                Allows you to perform the same kinds of batch operations as 
with the <codeph>-f</codeph> option,
+                but interactively within the interpreter. The file can contain 
SQL statements and other
+                <cmdname>impala-shell</cmdname> commands, including additional 
<codeph>SOURCE</codeph> commands
+                to perform a flexible sequence of actions. Each command or 
statement, except the last one in the file,
+                must end with a semicolon.
+                See <xref 
href="impala_shell_running_commands.xml#shell_running_commands"/> for examples.
+              </p>
+            </entry>
+          </row>
+          <row rev="1.4.0" id="summary_cmd">
+            <entry>
+              <p>
+                <codeph>summary</codeph>
+              </p>
+            </entry>
+            <entry>
+              <p>
+                Summarizes the work performed in various stages of a query. It 
provides a higher-level view of the
+                information displayed by the <codeph>EXPLAIN</codeph> command. 
Added in Impala 1.4.0. See
+                <xref href="impala_explain_plan.xml#perf_summary"/> for 
details about the report format
+                and how to interpret it.
+              </p>
+              <p rev="2.3.0">
+                In <keyword keyref="impala23_full"/> and higher, you can see a 
continuously updated report of
+                the summary information while a query is in progress.
+                See <xref href="impala_live_summary.xml#live_summary"/> for 
details.
+              </p>
+            </entry>
+          </row>
+          <row id="unset_cmd">
+            <entry>
+              <p>
+                <codeph>unset</codeph>
+              </p>
+            </entry>
+            <entry>
+              <p>
+                Removes any user-specified value for a query option and 
returns the option to its default value.
+                See <xref href="impala_query_options.xml#query_options"/> for 
the available query options.
+              </p>
+              <p rev="2.5.0 IMPALA-2180">
+                In <keyword keyref="impala25_full"/> and higher, it can also 
remove user-specified substitution variables
+                using the notation <codeph>UNSET 
VAR:<varname>variable_name</varname></codeph>.
+              </p>
+            </entry>
+          </row>
+          <row id="use_cmd">
+            <entry>
+              <p>
+                <codeph>use</codeph>
+              </p>
+            </entry>
+            <entry>
+              <p>
+                Indicates the database against which to execute subsequent 
commands. Lets you avoid using fully
+                qualified names when referring to tables in databases other 
than <codeph>default</codeph>. See
+                <xref href="impala_use.xml#use"/> for details. Not effective 
with the <codeph>-q</codeph> option,
+                because that option only allows a single statement in the 
argument.
+              </p>
+            </entry>
+          </row>
+          <row id="version_cmd">
+            <entry>
+              <p>
+                <codeph>version</codeph>
+              </p>
+            </entry>
+            <entry>
+              <p>
+                Returns Impala version information.
+              </p>
+            </entry>
+          </row>
+        </tbody>
+      </tgroup>
+    </table>
+  </conbody>
+</concept>

[08/51] [partial] incubator-impala git commit: IMPALA-3398: Add docs to main Impala branch.

Reply via email to