[47/51] [partial] incubator-impala git commit: IMPALA-3398: Add docs to main Impala branch.

jbapple Thu, 17 Nov 2016 15:12:39 -0800

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/3be0f122/docs/topics/impala.xml
----------------------------------------------------------------------
diff --git a/docs/topics/impala.xml b/docs/topics/impala.xml
new file mode 100644
index 0000000..8dc7b2a
--- /dev/null
+++ b/docs/topics/impala.xml
@@ -0,0 +1,77 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE concept PUBLIC "-//OASIS//DTD DITA Concept//EN" "concept.dtd">
+<concept id="about_impala">
+
+  <title>Apache Impala (incubating) - Interactive SQL</title>
+  <titlealts audience="PDF"><navtitle>Impala Guide</navtitle></titlealts>
+  <prolog>
+    <metadata>
+      <data name="Category" value="Impala"/>
+      <data name="Category" value="Components"/>
+      <data name="Category" value="Data Analysts"/>
+      <data name="Category" value="Developers"/>
+      <data name="Category" value="SQL"/>
+    </metadata>
+  </prolog>
+
+  <conbody>
+
+    <p conref="../shared/impala_common.xml#common/impala_mission_statement"/>
+
+    <p conref="../shared/impala_common.xml#common/impala_hive_compatibility"/>
+
+    <p conref="../shared/impala_common.xml#common/impala_advantages"/>
+
+    <p outputclass="toc"/>
+
+    <p audience="integrated">
+      <b>Related information throughout the CDH 5 library:</b>
+    </p>
+
+    <p audience="integrated">
+      In CDH 5, the Impala documentation for Release Notes, Installation, 
Upgrading, and Security has been
+      integrated alongside the corresponding information for other Hadoop 
components:
+    </p>
+
+<!-- Same list is in impala.xml and Impala FAQs. Conref in both places. -->
+
+    <ul>
+      <li>
+        <xref href="impala_new_features.xml#new_features">New features</xref>
+      </li>
+
+      <li>
+        <xref href="impala_known_issues.xml#known_issues">Known and fixed 
issues</xref>
+      </li>
+
+      <li>
+        <xref 
href="impala_incompatible_changes.xml#incompatible_changes">Incompatible 
changes</xref>
+      </li>
+
+      <li>
+        <xref href="impala_install.xml#install">Installing Impala</xref>
+      </li>
+
+      <li>
+        <xref href="impala_upgrading.xml#upgrading">Upgrading Impala</xref>
+      </li>
+
+      <li>
+        <xref href="impala_config.xml#config">Configuring Impala</xref>
+      </li>
+
+      <li>
+        <xref href="impala_processes.xml#processes">Starting Impala</xref>
+      </li>
+
+      <li>
+        <xref href="impala_security.xml#security">Security for Impala</xref>
+      </li>
+
+      <li>
+        <xref 
href="http://www.cloudera.com/content/cloudera-content/cloudera-docs/CDH5/latest/CDH-Version-and-Packaging-Information/CDH-Version-and-Packaging-Information.html";
 scope="external" format="html">CDH
+        Version and Packaging Information</xref>
+      </li>
+    </ul>
+  </conbody>
+</concept>


http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/3be0f122/docs/topics/impala_abort_on_default_limit_exceeded.xml
----------------------------------------------------------------------
diff --git a/docs/topics/impala_abort_on_default_limit_exceeded.xml 
b/docs/topics/impala_abort_on_default_limit_exceeded.xml
new file mode 100644
index 0000000..5b159a7
--- /dev/null
+++ b/docs/topics/impala_abort_on_default_limit_exceeded.xml
@@ -0,0 +1,23 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE concept PUBLIC "-//OASIS//DTD DITA Concept//EN" "concept.dtd">
+<concept rev="obwl" id="abort_on_default_limit_exceeded">
+
+  <title>ABORT_ON_DEFAULT_LIMIT_EXCEEDED Query Option</title>
+  <titlealts 
audience="PDF"><navtitle>ABORT_ON_DEFAULT_LIMIT_EXCEEDED</navtitle></titlealts>
+  <prolog>
+    <metadata>
+      <data name="Category" value="Impala"/>
+      <data name="Category" value="Impala Query Options"/>
+      <data name="Category" value="Developers"/>
+      <data name="Category" value="Data Analysts"/>
+    </metadata>
+  </prolog>
+
+  <conbody>
+
+    <p conref="../shared/impala_common.xml#common/obwl_query_options"/>
+
+    <p conref="../shared/impala_common.xml#common/type_boolean"/>
+    <p conref="../shared/impala_common.xml#common/default_false_0"/>
+  </conbody>
+</concept>

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/3be0f122/docs/topics/impala_abort_on_error.xml
----------------------------------------------------------------------
diff --git a/docs/topics/impala_abort_on_error.xml 
b/docs/topics/impala_abort_on_error.xml
new file mode 100644
index 0000000..950f8a4
--- /dev/null
+++ b/docs/topics/impala_abort_on_error.xml
@@ -0,0 +1,44 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE concept PUBLIC "-//OASIS//DTD DITA Concept//EN" "concept.dtd">
+<concept id="abort_on_error">
+
+  <title>ABORT_ON_ERROR Query Option</title>
+  <titlealts audience="PDF"><navtitle>ABORT_ON_ERROR</navtitle></titlealts>
+  <prolog>
+    <metadata>
+      <data name="Category" value="Impala"/>
+      <data name="Category" value="Impala Query Options"/>
+      <data name="Category" value="Troubleshooting"/>
+      <data name="Category" value="Querying"/>
+      <data name="Category" value="Developers"/>
+      <data name="Category" value="Data Analysts"/>
+    </metadata>
+  </prolog>
+
+  <conbody>
+
+    <p>
+      <indexterm audience="Cloudera">ABORT_ON_ERROR query option</indexterm>
+      When this option is enabled, Impala cancels a query immediately when any 
of the nodes encounters an error,
+      rather than continuing and possibly returning incomplete results. This 
option is disabled by default, to help
+      gather maximum diagnostic information when an error occurs, for example, 
whether the same problem occurred on
+      all nodes or only a single node. Currently, the errors that Impala can 
skip over involve data corruption,
+      such as a column that contains a string value when expected to contain 
an integer value.
+    </p>
+
+    <p>
+      To control how much logging Impala does for non-fatal errors when 
<codeph>ABORT_ON_ERROR</codeph> is turned
+      off, use the <codeph>MAX_ERRORS</codeph> option.
+    </p>
+
+    <p conref="../shared/impala_common.xml#common/type_boolean"/>
+    <p conref="../shared/impala_common.xml#common/default_false_0"/>
+
+    <p conref="../shared/impala_common.xml#common/related_info"/>
+    <p>
+      <xref href="impala_max_errors.xml#max_errors"/>,
+      <xref href="impala_logging.xml#logging"/>
+    </p>
+
+  </conbody>
+</concept>

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/3be0f122/docs/topics/impala_admin.xml
----------------------------------------------------------------------
diff --git a/docs/topics/impala_admin.xml b/docs/topics/impala_admin.xml
new file mode 100644
index 0000000..3da7d5f
--- /dev/null
+++ b/docs/topics/impala_admin.xml
@@ -0,0 +1,60 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE concept PUBLIC "-//OASIS//DTD DITA Concept//EN" "concept.dtd">
+<concept id="admin">
+
+  <title>Impala Administration</title>
+  <titlealts audience="PDF"><navtitle>Administration</navtitle></titlealts>
+  <prolog>
+    <metadata>
+      <data name="Category" value="Impala"/>
+      <data name="Category" value="Administrators"/>
+      <!-- Although there is a reasonable amount of info on the page, it could 
be better to use wiki-style embedding instead of linking hither and thither. -->
+      <data name="Category" value="Stub Pages"/>
+    </metadata>
+  </prolog>
+
+  <conbody>
+
+    <p>
+      As an administrator, you monitor Impala's use of resources and take 
action when necessary to keep Impala
+      running smoothly and avoid conflicts with other Hadoop components 
running on the same cluster. When you
+      detect that an issue has happened or could happen in the future, you 
reconfigure Impala or other components
+      such as HDFS or even the hardware of the cluster itself to resolve or 
avoid problems.
+    </p>
+
+    <p outputclass="toc"/>
+
+    <p>
+      <b>Related tasks:</b>
+    </p>
+
+    <p>
+      As an administrator, you can expect to perform installation, upgrade, 
and configuration tasks for Impala on
+      all machines in a cluster. See <xref href="impala_install.xml#install"/>,
+      <xref href="impala_upgrading.xml#upgrading"/>, and <xref 
href="impala_config.xml#config"/> for details.
+    </p>
+
+    <p>
+      For security tasks typically performed by administrators, see <xref 
href="impala_security.xml#security"/>.
+    </p>
+
+    <p>
+      Administrators also decide how to allocate cluster resources so that all 
Hadoop components can run smoothly
+      together. For Impala, this task primarily involves:
+      <ul>
+        <li>
+          Deciding how many Impala queries can run concurrently and with how 
much memory, through the admission
+          control feature. See <xref 
href="impala_admission.xml#admission_control"/> for details.
+        </li>
+
+        <li>
+          Dividing cluster resources such as memory between Impala and other 
components, using YARN for overall
+          resource management, and Llama to mediate resource requests from 
Impala to YARN. See
+          <xref href="impala_resource_management.xml#resource_management"/> 
for details.
+        </li>
+      </ul>
+    </p>
+
+<!-- <p conref="../shared/impala_common.xml#common/impala_mr"/> -->
+  </conbody>
+</concept>

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/3be0f122/docs/topics/impala_admission.xml
----------------------------------------------------------------------
diff --git a/docs/topics/impala_admission.xml b/docs/topics/impala_admission.xml
new file mode 100644
index 0000000..ad53823
--- /dev/null
+++ b/docs/topics/impala_admission.xml
@@ -0,0 +1,947 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE concept PUBLIC "-//OASIS//DTD DITA Concept//EN" "concept.dtd">
+<concept rev="1.3.0" id="admission_control">
+
+  <title>Admission Control and Query Queuing</title>
+  <prolog>
+    <metadata>
+      <data name="Category" value="Impala"/>
+      <data name="Category" value="Querying"/>
+      <data name="Category" value="Admission Control"/>
+      <data name="Category" value="Resource Management"/>
+    </metadata>
+  </prolog>
+
+  <conbody>
+
+    <p id="admission_control_intro">
+      Admission control is an Impala feature that imposes limits on concurrent 
SQL queries, to avoid resource usage
+      spikes and out-of-memory conditions on busy CDH clusters.
+      It is a form of <q>throttling</q>.
+      New queries are accepted and executed until
+      certain conditions are met, such as too many queries or too much
+      total memory used across the cluster.
+      When one of these thresholds is reached,
+      incoming queries wait to begin execution. These queries are
+      queued and are admitted (that is, begin executing) when the resources 
become available.
+    </p>
+    <p>
+      In addition to the threshold values for currently executing queries,
+      you can place limits on the maximum number of queries that are
+      queued (waiting) and a limit on the amount of time they might wait
+      before returning with an error. These queue settings let you ensure that 
queries do
+      not wait indefinitely, so that you can detect and correct 
<q>starvation</q> scenarios.
+    </p>
+    <p>
+      Enable this feature if your cluster is
+      underutilized at some times and overutilized at others. Overutilization 
is indicated by performance
+      bottlenecks and queries being cancelled due to out-of-memory conditions, 
when those same queries are
+      successful and perform well during times with less concurrent load. 
Admission control works as a safeguard to
+      avoid out-of-memory conditions during heavy concurrent usage.
+    </p>
+
+    <note conref="../shared/impala_common.xml#common/impala_llama_obsolete"/>
+
+    <p outputclass="toc inpage"/>
+  </conbody>
+
+  <concept id="admission_intro">
+
+    <title>Overview of Impala Admission Control</title>
+  <prolog>
+    <metadata>
+      <data name="Category" value="Concepts"/>
+    </metadata>
+  </prolog>
+
+    <conbody>
+
+      <p>
+        On a busy CDH cluster, you might find there is an optimal number of 
Impala queries that run concurrently.
+        For example, when the I/O capacity is fully utilized by I/O-intensive 
queries,
+        you might not find any throughput benefit in running more concurrent 
queries.
+        By allowing some queries to run at full speed while others wait, 
rather than having
+        all queries contend for resources and run slowly, admission control 
can result in higher overall throughput.
+      </p>
+
+      <p>
+        For another example, consider a memory-bound workload such as many 
large joins or aggregation queries.
+        Each such query could briefly use many gigabytes of memory to process 
intermediate results.
+        Because Impala by default cancels queries that exceed the specified 
memory limit,
+        running multiple large-scale queries at once might require
+        re-running some queries that are cancelled. In this case, admission 
control improves the
+        reliability and stability of the overall workload by only allowing as 
many concurrent queries
+        as the overall memory of the cluster can accomodate.
+      </p>
+
+      <p>
+        The admission control feature lets you set an upper limit on the 
number of concurrent Impala
+        queries and on the memory used by those queries. Any additional 
queries are queued until the earlier ones
+        finish, rather than being cancelled or running slowly and causing 
contention. As other queries finish, the
+        queued queries are allowed to proceed.
+      </p>
+
+      <p rev="2.5.0">
+        In <keyword keyref="impala25_full"/> and higher, you can specify these 
limits and thresholds for each
+        pool rather than globally. That way, you can balance the resource 
usage and throughput
+        between steady well-defined workloads, rare resource-intensive 
queries, and ad hoc
+        exploratory queries.
+      </p>
+
+      <p>
+        For details on the internal workings of admission control, see
+        <xref href="impala_admission.xml#admission_architecture"/>.
+      </p>
+    </conbody>
+  </concept>
+
+  <concept id="admission_concurrency">
+    <title>Concurrent Queries and Admission Control</title>
+    <conbody>
+      <p>
+        One way to limit resource usage through admission control is to set an 
upper limit
+        on the number of concurrent queries. This is the initial technique you 
might use
+        when you do not have extensive information about memory usage for your 
workload.
+        This setting can be specified separately for each dynamic resource 
pool.
+      </p>
+      <p>
+        You can combine this setting with the memory-based approach described 
in
+        <xref href="impala_admission.xml#admission_memory"/>. If either the 
maximum number of
+        or the expected memory usage of the concurrent queries is exceeded, 
subsequent queries
+        are queued until the concurrent workload falls below the threshold 
again.
+      </p>
+      <p>
+        See
+        <xref audience="integrated" 
href="cm_mc_resource_pools.xml#concept_xkk_l1d_wr"/><xref audience="standalone" 
href="http://www.cloudera.com/documentation/enterprise/latest/topics/cm_mc_resource_pools.html";
 scope="external" format="html"/>
+        for information about all these dynamic resource
+        pool settings, how to use them together, and how to divide different 
parts of your workload among
+        different pools.
+      </p>
+    </conbody>
+  </concept>
+
+  <concept id="admission_memory">
+    <title>Memory Limits and Admission Control</title>
+    <conbody>
+      <p>
+        Each dynamic resource pool can have an upper limit on the cluster-wide 
memory used by queries executing in that pool.
+        This is the technique to use once you have a stable workload with 
well-understood memory requirements.
+      </p>
+      <p>
+        Always specify the <uicontrol>Default Query Memory Limit</uicontrol> 
for the expected maximum amount of RAM
+        that a query might require on each host, which is equivalent to 
setting the <codeph>MEM_LIMIT</codeph>
+        query option for every query run in that pool. That value affects the 
execution of each query, preventing it
+        from overallocating memory on each host, and potentially activating 
the spill-to-disk mechanism or cancelling
+        the query when necessary.
+      </p>
+      <p>
+        Optionally, specify the <uicontrol>Max Memory</uicontrol> setting, a 
cluster-wide limit that determines
+        how many queries can be safely run concurrently, based on the upper 
memory limit per host multiplied by the
+        number of Impala nodes in the cluster.
+      </p>
+      <p 
conref="../shared/impala_common.xml#common/admission_control_mem_limit_interaction"/>
+      <note 
conref="../shared/impala_common.xml#common/max_memory_default_limit_caveat"/>
+      <p>
+        You can combine the memory-based settings with the upper limit on 
concurrent queries described in
+        <xref href="impala_admission.xml#admission_concurrency"/>. If either 
the maximum number of
+        or the expected memory usage of the concurrent queries is exceeded, 
subsequent queries
+        are queued until the concurrent workload falls below the threshold 
again.
+      </p>
+      <p>
+        See
+        <xref audience="integrated" 
href="cm_mc_resource_pools.xml#concept_xkk_l1d_wr"/><xref audience="standalone" 
href="http://www.cloudera.com/documentation/enterprise/latest/topics/cm_mc_resource_pools.html";
 scope="external" format="html"/>
+        for information about all these dynamic resource
+        pool settings, how to use them together, and how to divide different 
parts of your workload among
+        different pools.
+      </p>
+    </conbody>
+  </concept>
+
+  <concept id="admission_yarn">
+
+    <title>How Impala Admission Control Relates to Other Resource Management 
Tools</title>
+  <prolog>
+    <metadata>
+      <data name="Category" value="Concepts"/>
+    </metadata>
+  </prolog>
+
+    <conbody>
+
+      <p>
+        The admission control feature is similar in some ways to the Cloudera 
Manager
+        static partitioning feature, as well as the YARN resource management 
framework. These features
+        can be used separately or together. This section describes some 
similarities and differences, to help you
+        decide which combination of resource management features to use for 
Impala.
+      </p>
+
+      <p>
+        Admission control is a lightweight, decentralized system that is 
suitable for workloads consisting
+        primarily of Impala queries and other SQL statements. It sets 
<q>soft</q> limits that smooth out Impala
+        memory usage during times of heavy load, rather than taking an 
all-or-nothing approach that cancels jobs
+        that are too resource-intensive.
+      </p>
+
+      <p>
+        Because the admission control system does not interact with other 
Hadoop workloads such as MapReduce jobs, you
+        might use YARN with static service pools on CDH 5 clusters where 
resources are shared between
+        Impala and other Hadoop components. This configuration is recommended 
when using Impala in a
+        <term>multitenant</term> cluster. Devote a percentage of cluster 
resources to Impala, and allocate another
+        percentage for MapReduce and other batch-style workloads. Let 
admission control handle the concurrency and
+        memory usage for the Impala work within the cluster, and let YARN 
manage the work for other components within the
+        cluster. In this scenario, Impala's resources are not managed by YARN.
+      </p>
+
+      <p>
+        The Impala admission control feature uses the same configuration 
mechanism as the YARN resource manager to map users to
+        pools and authenticate them.
+      </p>
+
+      <p rev="DOCS-648">
+        Although the Impala admission control feature uses a 
<codeph>fair-scheduler.xml</codeph> configuration file
+        behind the scenes, this file does not depend on which scheduler is 
used for YARN. You still use this file,
+        and Cloudera Manager can generate it for you, even when YARN is using 
the capacity scheduler.
+      </p>
+
+    </conbody>
+  </concept>
+
+  <concept id="admission_architecture">
+
+    <title>How Impala Schedules and Enforces Limits on Concurrent 
Queries</title>
+  <prolog>
+    <metadata>
+      <data name="Category" value="Concepts"/>
+      <data name="Category" value="Scheduling"/>
+    </metadata>
+  </prolog>
+
+    <conbody>
+
+      <p>
+        The admission control system is decentralized, embedded in each Impala 
daemon and communicating through the
+        statestore mechanism. Although the limits you set for memory usage and 
number of concurrent queries apply
+        cluster-wide, each Impala daemon makes its own decisions about whether 
to allow each query to run
+        immediately or to queue it for a less-busy time. These decisions are 
fast, meaning the admission control
+        mechanism is low-overhead, but might be imprecise during times of 
heavy load across many coordinators. There could be times when the
+        more queries were queued (in aggregate across the cluster) than the 
specified limit, or when number of admitted queries
+        exceeds the expected number. Thus, you typically err on the
+        high side for the size of the queue, because there is not a big 
penalty for having a large number of queued
+        queries; and you typically err on the low side for configuring memory 
resources, to leave some headroom in case more
+        queries are admitted than expected, without running out of memory and 
being cancelled as a result.
+      </p>
+
+<!-- Commenting out as redundant.
+      <p>
+        The limit on the number of concurrent queries is a <q>soft</q> one, To 
achieve high throughput, Impala
+        makes quick decisions at the host level about which queued queries to 
dispatch. Therefore, Impala might
+        slightly exceed the limits from time to time.
+      </p>
+-->
+
+      <p>
+        To avoid a large backlog of queued requests, you can set an upper 
limit on the size of the queue for
+        queries that are queued. When the number of queued queries exceeds 
this limit, further queries are
+        cancelled rather than being queued. You can also configure a timeout 
period per pool, after which queued queries are
+        cancelled, to avoid indefinite waits. If a cluster reaches this state 
where queries are cancelled due to
+        too many concurrent requests or long waits for query execution to 
begin, that is a signal for an
+        administrator to take action, either by provisioning more resources, 
scheduling work on the cluster to
+        smooth out the load, or by doing <xref 
href="impala_performance.xml#performance">Impala performance
+        tuning</xref> to enable higher throughput.
+      </p>
+    </conbody>
+  </concept>
+
+  <concept id="admission_jdbc_odbc">
+
+    <title>How Admission Control works with Impala Clients (JDBC, ODBC, 
HiveServer2)</title>
+  <prolog>
+    <metadata>
+      <data name="Category" value="JDBC"/>
+      <data name="Category" value="ODBC"/>
+      <data name="Category" value="HiveServer2"/>
+      <data name="Category" value="Concepts"/>
+    </metadata>
+  </prolog>
+
+    <conbody>
+
+      <p>
+        Most aspects of admission control work transparently with client 
interfaces such as JDBC and ODBC:
+      </p>
+
+      <ul>
+        <li>
+          If a SQL statement is put into a queue rather than running 
immediately, the API call blocks until the
+          statement is dequeued and begins execution. At that point, the 
client program can request to fetch
+          results, which might also block until results become available.
+        </li>
+
+        <li>
+          If a SQL statement is cancelled because it has been queued for too 
long or because it exceeded the memory
+          limit during execution, the error is returned to the client program 
with a descriptive error message.
+        </li>
+
+      </ul>
+
+      <p rev="CDH-27667">
+        In Impala 2.0 and higher, you can submit
+        a SQL <codeph>SET</codeph> statement from the client application
+        to change the <codeph>REQUEST_POOL</codeph> query option.
+        This option lets you submit queries to different resource pools,
+        as described in <xref href="impala_request_pool.xml#request_pool"/>.
+<!-- Commenting out as starting to be too old to mention.
+        Prior to Impala 2.0, that option was only settable
+        for a session through the <cmdname>impala-shell</cmdname> 
<codeph>SET</codeph> command, or cluster-wide through an
+        <cmdname>impalad</cmdname> startup option.
+-->
+      </p>
+
+      <p>
+        At any time, the set of queued queries could include queries submitted 
through multiple different Impala
+        daemon hosts. All the queries submitted through a particular host will 
be executed in order, so a
+        <codeph>CREATE TABLE</codeph> followed by an <codeph>INSERT</codeph> 
on the same table would succeed.
+        Queries submitted through different hosts are not guaranteed to be 
executed in the order they were
+        received. Therefore, if you are using load-balancing or other 
round-robin scheduling where different
+        statements are submitted through different hosts, set up all table 
structures ahead of time so that the
+        statements controlled by the queuing system are primarily queries, 
where order is not significant. Or, if a
+        sequence of statements needs to happen in strict order (such as an 
<codeph>INSERT</codeph> followed by a
+        <codeph>SELECT</codeph>), submit all those statements through a single 
session, while connected to the same
+        Impala daemon host.
+      </p>
+
+      <p>
+        Admission control has the following limitations or special behavior 
when used with JDBC or ODBC
+        applications:
+      </p>
+
+      <ul>
+        <li>
+          The other resource-related query options,
+          <codeph>RESERVATION_REQUEST_TIMEOUT</codeph> and 
<codeph>V_CPU_CORES</codeph>, are no longer used. Those query options only
+          applied to using Impala with Llama, which is no longer supported.
+        </li>
+      </ul>
+    </conbody>
+  </concept>
+
+  <concept id="admission_schema_config">
+    <title>SQL and Schema Considerations for Admission Control</title>
+    <conbody>
+      <p>
+        When queries complete quickly and are tuned for optimal memory usage, 
there is less chance of
+        performance or capacity problems during times of heavy load. Before 
setting up admission control,
+        tune your Impala queries to ensure that the query plans are efficient 
and the memory estimates
+        are accurate. Understanding the nature of your workload, and which 
queries are the most
+        resource-intensive, helps you to plan how to divide the queries into 
different pools and
+        decide what limits to define for each pool.
+      </p>
+      <p>
+        For large tables, especially those involved in join queries, keep 
their statistics up to date
+        after loading substantial amounts of new data or adding new partitions.
+        Use the <codeph>COMPUTE STATS</codeph> statement for unpartitioned 
tables, and
+        <codeph>COMPUTE INCREMENTAL STATS</codeph> for partitioned tables.
+      </p>
+      <p>
+        When you use dynamic resource pools with a <uicontrol>Max 
Memory</uicontrol> setting enabled,
+        you typically override the memory estimates that Impala makes based on 
the statistics from the
+        <codeph>COMPUTE STATS</codeph> statement.
+        You either set the <codeph>MEM_LIMIT</codeph> query option within a 
particular session to
+        set an upper memory limit for queries within that session, or a 
default <codeph>MEM_LIMIT</codeph>
+        setting for all queries processed by the <cmdname>impalad</cmdname> 
instance, or
+        a default <codeph>MEM_LIMIT</codeph> setting for all queries assigned 
to a particular
+        dynamic resource pool. By designating a consistent memory limit for a 
set of similar queries
+        that use the same resource pool, you avoid unnecessary query queuing 
or out-of-memory conditions
+        that can arise during high-concurrency workloads when memory estimates 
for some queries are inaccurate.
+      </p>
+      <p>
+        Follow other steps from <xref 
href="impala_performance.xml#performance"/> to tune your queries.
+      </p>
+    </conbody>
+  </concept>
+
+
+  <concept id="admission_config">
+
+    <title>Configuring Admission Control</title>
+  <prolog>
+    <metadata>
+      <data name="Category" value="Configuring"/>
+    </metadata>
+  </prolog>
+
+    <conbody>
+
+      <p>
+        The configuration options for admission control range from the simple 
(a single resource pool with a single
+        set of options) to the complex (multiple resource pools with different 
options, each pool handling queries
+        for a different set of users and groups). <ph 
rev="upstream">Cloudera</ph> recommends configuring the settings through the 
Cloudera Manager user
+        interface.
+        <!--
+        , or on a system without Cloudera Manager by editing configuration 
files or through startup
+        options to the <cmdname>impalad</cmdname> daemon.
+        -->
+      </p>
+
+<!-- To do: reconcile the similar notes in impala_admission.xml and 
admin_impala_admission_control.xml
+     and make into a conref in both places. -->
+      <note type="important">
+        Although the following options are still present in the Cloudera 
Manager interface under the
+        <uicontrol>Admission Control</uicontrol> configuration settings dialog,
+        <ph rev="upstream">Cloudera</ph> recommends you not use them in 
<keyword keyref="impala25_full"/> and higher.
+        These settings only apply if you enable admission control but leave 
dynamic resource pools disabled.
+        In <keyword keyref="impala25_full"/> and higher, prefer to set up 
dynamic resource pools and
+        customize the settings for each pool, as described in
+        <ph audience="integrated"><xref 
href="cm_mc_resource_pools.xml#concept_xkk_l1d_wr/section_p15_mhn_2v"/> and 
<xref 
href="cm_mc_resource_pools.xml#concept_xkk_l1d_wr/section_gph_tnk_lm"/></ph>
+        <xref audience="standalone" 
href="http://www.cloudera.com/documentation/enterprise/latest/topics/cm_mc_resource_pools.html";
 scope="external" format="html"/>.
+      </note>
+
+      <section id="admission_flags">
+
+        <title>Impala Service Flags for Admission Control (Advanced)</title>
+
+        <p>
+          The following Impala configuration options let you adjust the 
settings of the admission control feature. When supplying the
+          options on the <cmdname>impalad</cmdname> command line, prepend the 
option name with <codeph>--</codeph>.
+        </p>
+
+        <dl id="admission_control_option_list">
+          <dlentry id="queue_wait_timeout_ms">
+            <dt>
+              <codeph>queue_wait_timeout_ms</codeph>
+            </dt>
+            <dd>
+              <indexterm 
audience="Cloudera">--queue_wait_timeout_ms</indexterm>
+              <b>Purpose:</b> Maximum amount of time (in milliseconds) that a
+              request waits to be admitted before timing out.
+              <p>
+                <b>Type:</b> <codeph>int64</codeph>
+              </p>
+              <p>
+                <b>Default:</b> <codeph>60000</codeph>
+              </p>
+            </dd>
+          </dlentry>
+          <dlentry id="default_pool_max_requests">
+            <dt>
+              <codeph>default_pool_max_requests</codeph>
+            </dt>
+            <dd>
+              <indexterm 
audience="Cloudera">--default_pool_max_requests</indexterm>
+              <b>Purpose:</b> Maximum number of concurrent outstanding requests
+              allowed to run before incoming requests are queued. Because this
+              limit applies cluster-wide, but each Impala node makes 
independent
+              decisions to run queries immediately or queue them, it is a soft
+              limit; the overall number of concurrent queries might be slightly
+              higher during times of heavy load. A negative value indicates no
+              limit. Ignored if <codeph>fair_scheduler_config_path</codeph> and
+                <codeph>llama_site_path</codeph> are set. <p>
+                <b>Type:</b>
+                <codeph>int64</codeph>
+              </p>
+              <p>
+                <b>Default:</b>
+                <ph rev="2.5.0">-1, meaning unlimited (prior to <keyword 
keyref="impala25_full"/> the default was 200)</ph>
+              </p>
+            </dd>
+          </dlentry>
+          <dlentry id="default_pool_max_queued">
+            <dt>
+              <codeph>default_pool_max_queued</codeph>
+            </dt>
+            <dd>
+              <indexterm 
audience="Cloudera">--default_pool_max_queued</indexterm>
+              <b>Purpose:</b> Maximum number of requests allowed to be queued
+              before rejecting requests. Because this limit applies
+              cluster-wide, but each Impala node makes independent decisions to
+              run queries immediately or queue them, it is a soft limit; the
+              overall number of queued queries might be slightly higher during
+              times of heavy load. A negative value or 0 indicates requests are
+              always rejected once the maximum concurrent requests are
+              executing. Ignored if <codeph>fair_scheduler_config_path</codeph>
+              and <codeph>llama_site_path</codeph> are set. <p>
+                <b>Type:</b>
+                <codeph>int64</codeph>
+              </p>
+              <p>
+                <b>Default:</b>
+                <ph rev="2.5.0">unlimited</ph>
+              </p>
+            </dd>
+          </dlentry>
+          <dlentry id="default_pool_mem_limit">
+            <dt>
+              <codeph>default_pool_mem_limit</codeph>
+            </dt>
+            <dd>
+              <indexterm 
audience="Cloudera">--default_pool_mem_limit</indexterm>
+              <b>Purpose:</b> Maximum amount of memory (across the entire
+              cluster) that all outstanding requests in this pool can use 
before
+              new requests to this pool are queued. Specified in bytes,
+              megabytes, or gigabytes by a number followed by the suffix
+                <codeph>b</codeph> (optional), <codeph>m</codeph>, or
+                <codeph>g</codeph>, either uppercase or lowercase. You can
+              specify floating-point values for megabytes and gigabytes, to
+              represent fractional numbers such as <codeph>1.5</codeph>. You 
can
+              also specify it as a percentage of the physical memory by
+              specifying the suffix <codeph>%</codeph>. 0 or no setting
+              indicates no limit. Defaults to bytes if no unit is given. 
Because
+              this limit applies cluster-wide, but each Impala node makes
+              independent decisions to run queries immediately or queue them, 
it
+              is a soft limit; the overall memory used by concurrent queries
+              might be slightly higher during times of heavy load. Ignored if
+                <codeph>fair_scheduler_config_path</codeph> and
+                <codeph>llama_site_path</codeph> are set. <note
+                
conref="../shared/impala_common.xml#common/admission_compute_stats" />
+              <p conref="../shared/impala_common.xml#common/type_string" />
+              <p>
+                <b>Default:</b>
+                <codeph>""</codeph> (empty string, meaning unlimited) </p>
+            </dd>
+          </dlentry>
+          <!-- Possibly from here on down, command-line controls not 
applicable to CM. -->
+          <dlentry id="disable_admission_control">
+            <dt>
+              <codeph>disable_admission_control</codeph>
+            </dt>
+            <dd>
+              <indexterm 
audience="Cloudera">--disable_admission_control</indexterm>
+              <b>Purpose:</b> Turns off the admission control feature entirely,
+              regardless of other configuration option settings.
+              <p>
+                <b>Type:</b> Boolean </p>
+              <p>
+                <b>Default:</b>
+                <codeph>false</codeph>
+              </p>
+            </dd>
+          </dlentry>
+          <dlentry id="disable_pool_max_requests">
+            <dt>
+              <codeph>disable_pool_max_requests</codeph>
+            </dt>
+            <dd>
+              <indexterm 
audience="Cloudera">--disable_pool_max_requests</indexterm>
+              <b>Purpose:</b> Disables all per-pool limits on the maximum 
number
+              of running requests. <p>
+                <b>Type:</b> Boolean </p>
+              <p>
+                <b>Default:</b>
+                <codeph>false</codeph>
+              </p>
+            </dd>
+          </dlentry>
+          <dlentry id="disable_pool_mem_limits">
+            <dt>
+              <codeph>disable_pool_mem_limits</codeph>
+            </dt>
+            <dd>
+              <indexterm 
audience="Cloudera">--disable_pool_mem_limits</indexterm>
+              <b>Purpose:</b> Disables all per-pool mem limits. <p>
+                <b>Type:</b> Boolean </p>
+              <p>
+                <b>Default:</b>
+                <codeph>false</codeph>
+              </p>
+            </dd>
+          </dlentry>
+          <dlentry id="fair_scheduler_allocation_path">
+            <dt>
+              <codeph>fair_scheduler_allocation_path</codeph>
+            </dt>
+            <dd>
+              <indexterm 
audience="Cloudera">--fair_scheduler_allocation_path</indexterm>
+              <b>Purpose:</b> Path to the fair scheduler allocation file
+                (<codeph>fair-scheduler.xml</codeph>). <p
+                conref="../shared/impala_common.xml#common/type_string" />
+              <p>
+                <b>Default:</b>
+                <codeph>""</codeph> (empty string) </p>
+              <p>
+                <b>Usage notes:</b> Admission control only uses a small subset
+                of the settings that can go in this file, as described below.
+                For details about all the Fair Scheduler configuration 
settings,
+                see the <xref
+                  
href="http://hadoop.apache.org/docs/current/hadoop-yarn/hadoop-yarn-site/FairScheduler.html#Configuration";
+                  scope="external" format="html">Apache wiki</xref>. </p>
+            </dd>
+          </dlentry>
+          <dlentry id="llama_site_path">
+            <dt>
+              <codeph>llama_site_path</codeph>
+            </dt>
+            <dd>
+              <indexterm audience="Cloudera">--llama_site_path</indexterm>
+              <b>Purpose:</b> Path to the configuration file used by admission 
control
+                (<codeph>llama-site.xml</codeph>). If set,
+                <codeph>fair_scheduler_allocation_path</codeph> must also be 
set.
+              <p conref="../shared/impala_common.xml#common/type_string" />
+              <p>
+                <b>Default:</b> <codeph>""</codeph> (empty string) </p>
+              <p>
+                <b>Usage notes:</b> Admission control only uses a few
+                of the settings that can go in this file, as described below.
+              </p>
+            </dd>
+          </dlentry>
+        </dl>
+      </section>
+    </conbody>
+
+    <concept id="admission_config_cm">
+
+<!-- TK: Maybe all this stuff overlaps with admin_impala_admission_control and 
can be delegated there. -->
+
+      <title>Configuring Admission Control Using Cloudera Manager</title>
+  <prolog>
+    <metadata>
+      <data name="Category" value="Cloudera Manager"/>
+    </metadata>
+  </prolog>
+
+      <conbody>
+
+        <p>
+          In Cloudera Manager, you can configure pools to manage queued Impala 
queries, and the options for the
+          limit on number of concurrent queries and how to handle queries that 
exceed the limit. For details, see
+          <xref 
href="http://www.cloudera.com/documentation/enterprise/latest/topics/cm_mc_managing_resources.html";
 scope="external" format="html">Managing Resources with Cloudera Manager</xref>.
+        </p>
+
+        <p audience="Cloudera"><!-- Hiding link because that subtopic is now 
hidden. -->
+          See <xref href="#admission_examples"/> for a sample setup for 
admission control under
+          Cloudera Manager.
+        </p>
+      </conbody>
+    </concept>
+
+    <concept id="admission_config_noncm">
+
+      <title>Configuring Admission Control Using the Command Line</title>
+
+      <conbody>
+
+        <p>
+          If you do not use Cloudera Manager, you use a combination of startup 
options for the Impala daemon, and
+          optionally editing or manually constructing the configuration files
+          <filepath>fair-scheduler.xml</filepath> and 
<filepath>llama-site.xml</filepath>.
+        </p>
+
+        <p>
+          For a straightforward configuration using a single resource pool 
named <codeph>default</codeph>, you can
+          specify configuration options on the command line and skip the 
<filepath>fair-scheduler.xml</filepath>
+          and <filepath>llama-site.xml</filepath> configuration files.
+        </p>
+
+        <p>
+          For an advanced configuration with multiple resource pools using 
different settings, set up the
+          <filepath>fair-scheduler.xml</filepath> and 
<filepath>llama-site.xml</filepath> configuration files
+          manually. Provide the paths to each one using the 
<cmdname>impalad</cmdname> command-line options,
+          <codeph>--fair_scheduler_allocation_path</codeph> and 
<codeph>--llama_site_path</codeph> respectively.
+        </p>
+
+        <p>
+          The Impala admission control feature only uses the Fair Scheduler 
configuration settings to determine how
+          to map users and groups to different resource pools. For example, 
you might set up different resource
+          pools with separate memory limits, and maximum number of concurrent 
and queued queries, for different
+          categories of users within your organization. For details about all 
the Fair Scheduler configuration
+          settings, see the
+          <xref 
href="http://hadoop.apache.org/docs/current/hadoop-yarn/hadoop-yarn-site/FairScheduler.html#Configuration";
 scope="external" format="html">Apache
+          wiki</xref>.
+        </p>
+
+        <p>
+          The Impala admission control feature only uses a small subset of 
possible settings from the
+          <filepath>llama-site.xml</filepath> configuration file:
+        </p>
+
+<codeblock>llama.am.throttling.maximum.placed.reservations.<varname>queue_name</varname>
+llama.am.throttling.maximum.queued.reservations.<varname>queue_name</varname>
+<ph rev="2.5.0 
IMPALA-2538">impala.admission-control.pool-default-query-options.<varname>queue_name</varname>
+impala.admission-control.pool-queue-timeout-ms.<varname>queue_name</varname></ph>
+</codeblock>
+
+        <p rev="2.5.0 IMPALA-2538">
+          The <codeph>impala.admission-control.pool-queue-timeout-ms</codeph>
+          setting specifies the timeout value for this pool, in milliseconds.
+          
The<codeph>impala.admission-control.pool-default-query-options</codeph>
+          settings designates the default query options for all queries that 
run
+          in this pool. Its argument value is a comma-delimited string of
+          'key=value' pairs, for 
example,<codeph>'key1=val1,key2=val2'</codeph>.
+          For example, this is where you might set a default memory limit
+          for all queries in the pool, using an argument such as 
<codeph>MEM_LIMIT=5G</codeph>.
+        </p>
+
+        <p rev="2.5.0 IMPALA-2538">
+          The <codeph>impala.admission-control.*</codeph> configuration 
settings are available in
+          <keyword keyref="impala25_full"/> and higher.
+        </p>
+
+        <p audience="Cloudera"><!-- Hiding link because that subtopic is now 
hidden. -->
+          See <xref href="#admission_examples/section_etq_qgb_rq"/> for sample 
configuration files
+          for admission control using multiple resource pools, without 
Cloudera Manager.
+        </p>
+      </conbody>
+    </concept>
+
+    <concept id="admission_examples">
+    <!-- Pruning the CM examples and screenshots because in Impala 2.5 the 
defaults match up much better with our recommendations. -->
+
+      <title>Examples of Admission Control Configurations</title>
+
+      <conbody>
+
+        <section id="section_fqn_qgb_rq">
+
+          <title>Example Admission Control Configurations Using Cloudera 
Manager</title>
+
+          <p>
+            For full instructions about configuring dynamic resource pools 
through Cloudera Manager, see
+            <xref audience="integrated" 
href="cm_mc_resource_pools.xml#xd_583c10bfdbd326ba--43d5fd93-1410993f8c2--7ff2"/><xref
 audience="standalone" 
href="http://www.cloudera.com/documentation/enterprise/latest/topics/cm_mc_resource_pools.html";
 scope="external" format="html"/>.
+          </p>
+
+        </section>
+
+        <section id="section_etq_qgb_rq">
+
+          <title>Example Admission Control Configurations Using Configuration 
Files</title>
+
+          <p>
+            For clusters not managed by Cloudera Manager, here are sample 
<filepath>fair-scheduler.xml</filepath>
+            and <filepath>llama-site.xml</filepath> files that define resource 
pools <codeph>root.default</codeph>,
+            <codeph>root.development</codeph>, and 
<codeph>root.production</codeph>.
+            These sample files are stripped down: in a real deployment they
+            might contain other settings for use with various aspects of the 
YARN component. The
+            settings shown here are the significant ones for the Impala 
admission control feature.
+          </p>
+
+          <p>
+            <b>fair-scheduler.xml:</b>
+          </p>
+
+          <p>
+            Although Impala does not use the <codeph>vcores</codeph> value, 
you must still specify it to satisfy
+            YARN requirements for the file contents.
+          </p>
+
+          <p>
+            Each <codeph>&lt;aclSubmitApps&gt;</codeph> tag (other than the 
one for <codeph>root</codeph>) contains
+            a comma-separated list of users, then a space, then a 
comma-separated list of groups; these are the
+            users and groups allowed to submit Impala statements to the 
corresponding resource pool.
+          </p>
+
+          <p>
+            If you leave the <codeph>&lt;aclSubmitApps&gt;</codeph> element 
empty for a pool, nobody can submit
+            directly to that pool; child pools can specify their own 
<codeph>&lt;aclSubmitApps&gt;</codeph> values
+            to authorize users and groups to submit to those pools.
+          </p>
+
+<codeblock><![CDATA[<allocations>
+    <queue name="root">
+        <aclSubmitApps> </aclSubmitApps>
+        <queue name="default">
+            <maxResources>50000 mb, 0 vcores</maxResources>
+            <aclSubmitApps>*</aclSubmitApps>
+        </queue>
+        <queue name="development">
+            <maxResources>200000 mb, 0 vcores</maxResources>
+            <aclSubmitApps>user1,user2 dev,ops,admin</aclSubmitApps>
+        </queue>
+        <queue name="production">
+            <maxResources>1000000 mb, 0 vcores</maxResources>
+            <aclSubmitApps> ops,admin</aclSubmitApps>
+        </queue>
+    </queue>
+    <queuePlacementPolicy>
+        <rule name="specified" create="false"/>
+        <rule name="default" />
+    </queuePlacementPolicy>
+</allocations>
+]]>
+</codeblock>
+
+          <p>
+            <b>llama-site.xml:</b>
+          </p>
+
+<codeblock rev="2.5.0 IMPALA-2538"><![CDATA[
+<?xml version="1.0" encoding="UTF-8"?>
+<configuration>
+  <property>
+    <name>llama.am.throttling.maximum.placed.reservations.root.default</name>
+    <value>10</value>
+  </property>
+  <property>
+    <name>llama.am.throttling.maximum.queued.reservations.root.default</name>
+    <value>50</value>
+  </property>
+  <property>
+    
<name>impala.admission-control.pool-default-query-options.root.default</name>
+    <value>mem_limit=128m,query_timeout_s=20,max_io_buffers=10</value>
+  </property>
+  <property>
+    <name>impala.admission-control.pool-queue-timeout-ms.root.default</name>
+    <value>30000</value>
+  </property>
+  <property>
+    
<name>llama.am.throttling.maximum.placed.reservations.root.development</name>
+    <value>50</value>
+  </property>
+  <property>
+    
<name>llama.am.throttling.maximum.queued.reservations.root.development</name>
+    <value>100</value>
+  </property>
+  <property>
+    
<name>impala.admission-control.pool-default-query-options.root.development</name>
+    <value>mem_limit=256m,query_timeout_s=30,max_io_buffers=10</value>
+  </property>
+  <property>
+    
<name>impala.admission-control.pool-queue-timeout-ms.root.development</name>
+    <value>15000</value>
+  </property>
+  <property>
+    
<name>llama.am.throttling.maximum.placed.reservations.root.production</name>
+    <value>100</value>
+  </property>
+  <property>
+    
<name>llama.am.throttling.maximum.queued.reservations.root.production</name>
+    <value>200</value>
+  </property>
+<!--
+       Default query options for the 'root.production' pool.
+       THIS IS A NEW PARAMETER in CDH 5.7 / Impala 2.5.
+       Note that the MEM_LIMIT query option still shows up in here even though 
it is a
+       separate box in the UI. We do that because it is the most important 
query option
+       that people will need (everything else is somewhat advanced).
+
+       MEM_LIMIT takes a per-node memory limit which is specified using one of 
the following:
+        - '<int>[bB]?'  -> bytes (default if no unit given)
+        - '<float>[mM(bB)]' -> megabytes
+        - '<float>[gG(bB)]' -> in gigabytes
+        E.g. 'MEM_LIMIT=12345' (no unit) means 12345 bytes, and you can append 
m or g
+             to specify megabytes or gigabytes, though that is not required.
+-->
+  <property>
+    
<name>impala.admission-control.pool-default-query-options.root.production</name>
+    <value>mem_limit=386m,query_timeout_s=30,max_io_buffers=10</value>
+  </property>
+<!--
+  Default queue timeout (ms) for the pool 'root.production'.
+  If this isnât set, the process-wide flag is used.
+  THIS IS A NEW PARAMETER in CDH 5.7 / Impala 2.5.
+-->
+  <property>
+    <name>impala.admission-control.pool-queue-timeout-ms.root.production</name>
+    <value>30000</value>
+  </property>
+</configuration>
+]]>
+</codeblock>
+        </section>
+      </conbody>
+    </concept>
+  </concept>
+
+<!-- End Config -->
+
+  <concept id="admission_guidelines">
+
+    <title>Guidelines for Using Admission Control</title>
+  <prolog>
+    <metadata>
+      <data name="Category" value="Planning"/>
+      <data name="Category" value="Guidelines"/>
+      <data name="Category" value="Best Practices"/>
+    </metadata>
+  </prolog>
+
+    <conbody>
+
+      <p>
+        To see how admission control works for particular queries, examine the 
profile output for the query. This
+        information is available through the <codeph>PROFILE</codeph> 
statement in <cmdname>impala-shell</cmdname>
+        immediately after running a query in the shell, on the 
<uicontrol>queries</uicontrol> page of the Impala
+        debug web UI, or in the Impala log file (basic information at log 
level 1, more detailed information at log
+        level 2). The profile output contains details about the admission 
decision, such as whether the query was
+        queued or not and which resource pool it was assigned to. It also 
includes the estimated and actual memory
+        usage for the query, so you can fine-tune the configuration for the 
memory limits of the resource pools.
+      </p>
+
+      <p>
+        Where practical, use Cloudera Manager to configure the admission 
control parameters. The Cloudera Manager
+        GUI is much simpler than editing the configuration files directly.
+      </p>
+
+      <p>
+        Remember that the limits imposed by admission control are <q>soft</q> 
limits.
+        The decentralized nature of this mechanism means that each Impala node 
makes its own decisions about whether
+        to allow queries to run immediately or to queue them. These decisions 
rely on information passed back and forth
+        between nodes by the statestore service. If a sudden surge in requests 
causes more queries than anticipated to run
+        concurrently, then throughput could decrease due to queries spilling 
to disk or contending for resources;
+        or queries could be cancelled if they exceed the 
<codeph>MEM_LIMIT</codeph> setting while running.
+      </p>
+
+<!--
+      <p>
+        If you have trouble getting a query to run because its estimated 
memory usage is too high, you can override
+        the estimate by setting the <codeph>MEM_LIMIT</codeph> query option in 
<cmdname>impala-shell</cmdname>,
+        then issuing the query through the shell in the same session. The 
<codeph>MEM_LIMIT</codeph> value is
+        treated as the estimated amount of memory, overriding the estimate 
that Impala would generate based on
+        table and column statistics. This value is used only for making 
admission control decisions, and is not
+        pre-allocated by the query.
+      </p>
+-->
+
+      <p>
+        In <cmdname>impala-shell</cmdname>, you can also specify which 
resource pool to direct queries to by
+        setting the <codeph>REQUEST_POOL</codeph> query option.
+      </p>
+
+      <p>
+        The statements affected by the admission control feature are primarily 
queries, but also include statements
+        that write data such as <codeph>INSERT</codeph> and <codeph>CREATE 
TABLE AS SELECT</codeph>. Most write
+        operations in Impala are not resource-intensive, but inserting into a 
Parquet table can require substantial
+        memory due to buffering intermediate data before writing out each 
Parquet data block. See
+        <xref href="impala_parquet.xml#parquet_etl"/> for instructions about 
inserting data efficiently into
+        Parquet tables.
+      </p>
+
+      <p>
+        Although admission control does not scrutinize memory usage for other 
kinds of DDL statements, if a query
+        is queued due to a limit on concurrent queries or memory usage, 
subsequent statements in the same session
+        are also queued so that they are processed in the correct order:
+      </p>
+
+<codeblock>-- This query could be queued to avoid out-of-memory at times of 
heavy load.
+select * from huge_table join enormous_table using (id);
+-- If so, this subsequent statement in the same session is also queued
+-- until the previous statement completes.
+drop table huge_table;
+</codeblock>
+
+      <p>
+        If you set up different resource pools for different users and groups, 
consider reusing any classifications
+        you developed for use with Sentry security. See <xref 
href="impala_authorization.xml#authorization"/> for details.
+      </p>
+
+      <p>
+        For details about all the Fair Scheduler configuration settings, see
+        <xref 
href="https://archive.cloudera.com/cdh5/cdh/5/hadoop/hadoop-yarn/hadoop-yarn-site/FairScheduler.html#Configuration";
 scope="external" format="html">Fair
+        Scheduler Configuration</xref>, in particular the tags such as 
<codeph>&lt;queue&gt;</codeph> and
+        <codeph>&lt;aclSubmitApps&gt;</codeph> to map users and groups to 
particular resource pools (queues).
+      </p>
+
+<!-- Wait a sec. We say admission control doesn't use 
RESERVATION_REQUEST_TIMEOUT at all.
+     What's the real story here? Matt did refer to some timeout option that was
+     available through the shell but not the DB-centric APIs.
+<p>
+  Because you cannot override query options such as
+  <codeph>RESERVATION_REQUEST_TIMEOUT</codeph>
+  in a JDBC or ODBC application, consider configuring timeout periods
+  on the application side to cancel queries that take
+  too long due to being queued during times of high load.
+</p>
+-->
+    </conbody>
+  </concept>
+</concept>
+<!-- Admission control -->

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/3be0f122/docs/topics/impala_aggregate_functions.xml
----------------------------------------------------------------------
diff --git a/docs/topics/impala_aggregate_functions.xml 
b/docs/topics/impala_aggregate_functions.xml
new file mode 100644
index 0000000..66d617a
--- /dev/null
+++ b/docs/topics/impala_aggregate_functions.xml
@@ -0,0 +1,33 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE concept PUBLIC "-//OASIS//DTD DITA Concept//EN" "concept.dtd">
+<concept id="aggregate_functions">
+
+  <title>Impala Aggregate Functions</title>
+  <titlealts audience="PDF"><navtitle>Aggregate 
Functions</navtitle></titlealts>
+  <prolog>
+    <metadata>
+      <data name="Category" value="Impala"/>
+      <data name="Category" value="Impala Functions"/>
+      <data name="Category" value="Aggregate Functions"/>
+      <data name="Category" value="SQL"/>
+      <data name="Category" value="Data Analysts"/>
+      <data name="Category" value="Developers"/>
+      <data name="Category" value="Querying"/>
+    </metadata>
+  </prolog>
+
+  <conbody>
+
+    <p conref="../shared/impala_common.xml#common/aggr1"/>
+
+<codeblock conref="../shared/impala_common.xml#common/aggr2"/>
+
+    <p conref="../shared/impala_common.xml#common/aggr3"/>
+
+    <p>
+      <indexterm audience="Cloudera">aggregate functions</indexterm>
+    </p>
+
+    <p outputclass="toc"/>
+  </conbody>
+</concept>

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/3be0f122/docs/topics/impala_aliases.xml
----------------------------------------------------------------------
diff --git a/docs/topics/impala_aliases.xml b/docs/topics/impala_aliases.xml
new file mode 100644
index 0000000..fd686cd
--- /dev/null
+++ b/docs/topics/impala_aliases.xml
@@ -0,0 +1,87 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE concept PUBLIC "-//OASIS//DTD DITA Concept//EN" "concept.dtd">
+<concept id="aliases">
+
+  <title>Overview of Impala Aliases</title>
+  <titlealts audience="PDF"><navtitle>Aliases</navtitle></titlealts>
+  <prolog>
+    <metadata>
+      <data name="Category" value="Impala"/>
+      <data name="Category" value="SQL"/>
+      <data name="Category" value="Data Analysts"/>
+      <data name="Category" value="Developers"/>
+      <data name="Category" value="Querying"/>
+      <data name="Category" value="Tables"/>
+      <data name="Category" value="Schemas"/>
+    </metadata>
+  </prolog>
+
+  <conbody>
+
+    <p>
+      When you write the names of tables, columns, or column expressions in a 
query, you can assign an alias at the
+      same time. Then you can specify the alias rather than the original name 
when making other references to the
+      table or column in the same statement. You typically specify aliases 
that are shorter, easier to remember, or
+      both than the original names. The aliases are printed in the query 
header, making them useful for
+      self-documenting output.
+    </p>
+
+    <p>
+      To set up an alias, add the <codeph>AS <varname>alias</varname></codeph> 
clause immediately after any table,
+      column, or expression name in the <codeph>SELECT</codeph> list or 
<codeph>FROM</codeph> list of a query. The
+      <codeph>AS</codeph> keyword is optional; you can also specify the alias 
immediately after the original name.
+    </p>
+
+<codeblock>-- Make the column headers of the result set easier to understand.
+SELECT c1 AS name, c2 AS address, c3 AS phone FROM table_with_terse_columns;
+SELECT SUM(ss_xyz_dollars_net) AS total_sales FROM table_with_cryptic_columns;
+-- The alias can be a quoted string for extra readability.
+SELECT c1 AS "Employee ID", c2 AS "Date of hire" FROM t1;
+-- The AS keyword is optional.
+SELECT c1 "Employee ID", c2 "Date of hire" FROM t1;
+
+-- The table aliases assigned in the FROM clause can be used both earlier
+-- in the query (the SELECT list) and later (the WHERE clause).
+SELECT one.name, two.address, three.phone
+  FROM census one, building_directory two, phonebook three
+WHERE one.id = two.id and two.id = three.id;
+
+-- The aliases c1 and c2 let the query handle columns with the same names from 
2 joined tables.
+-- The aliases t1 and t2 let the query abbreviate references to long or 
cryptically named tables.
+SELECT t1.column_n AS c1, t2.column_n AS c2 FROM long_name_table AS t1, 
very_long_name_table2 AS t2
+  WHERE c1 = c2;
+SELECT t1.column_n c1, t2.column_n c2 FROM table1 t1, table2 t2
+  WHERE c1 = c2;
+</codeblock>
+
+    <p>
+      To use an alias name that matches one of the Impala reserved keywords 
(listed in
+      <xref href="impala_reserved_words.xml#reserved_words"/>), surround the 
identifier with either single or
+      double quotation marks, or <codeph>``</codeph> characters (backticks).
+    </p>
+
+    <p>
+      <ph conref="../shared/impala_common.xml#common/aliases_vs_identifiers"/>
+    </p>
+
+    <p conref="../shared/impala_common.xml#common/complex_types_blurb"/>
+
+    <p rev="2.3.0">
+      Queries involving the complex types (<codeph>ARRAY</codeph>,
+      <codeph>STRUCT</codeph>, and <codeph>MAP</codeph>), typically make
+      extensive use of table aliases. These queries involve join clauses
+      where the complex type column is treated as a joined table.
+      To construct two-part or three-part qualified names for the
+      complex column elements in the <codeph>FROM</codeph> list,
+      sometimes it is syntactically required to construct a table
+      alias for the complex column where it is referenced in the join clause.
+      See <xref href="impala_complex_types.xml#complex_types"/> for details 
and examples.
+    </p>
+
+    <p>
+      <b>Alternatives:</b>
+    </p>
+
+    <p conref="../shared/impala_common.xml#common/views_vs_identifiers"/>
+  </conbody>
+</concept>

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/3be0f122/docs/topics/impala_allow_unsupported_formats.xml
----------------------------------------------------------------------
diff --git a/docs/topics/impala_allow_unsupported_formats.xml 
b/docs/topics/impala_allow_unsupported_formats.xml
new file mode 100644
index 0000000..fbf758e
--- /dev/null
+++ b/docs/topics/impala_allow_unsupported_formats.xml
@@ -0,0 +1,31 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE concept PUBLIC "-//OASIS//DTD DITA Concept//EN" "concept.dtd">
+<concept id="allow_unsupported_formats">
+
+  <title>ALLOW_UNSUPPORTED_FORMATS Query Option</title>
+  <titlealts 
audience="PDF"><navtitle>ALLOW_UNSUPPORTED_FORMATS</navtitle></titlealts>
+  <prolog>
+    <metadata>
+      <data name="Category" value="Impala"/>
+      <data name="Category" value="Impala Query Options"/>
+      <data name="Category" value="Deprecated Features"/>
+    </metadata>
+  </prolog>
+
+  <conbody>
+
+<!--
+The original brief explanation with not enough detail comes from the comments 
at:
+  
http://github.sf.cloudera.com/CDH/Impala/raw/master/common/thrift/ImpalaService.thrift
+Removing that wording from here after discussions with dev team. Just 
recording the URL for posterity.
+-->
+
+    <p>
+      An obsolete query option from early work on support for file formats. Do 
not use. Might be removed in the
+      future.
+    </p>
+
+    <p conref="../shared/impala_common.xml#common/type_boolean"/>
+    <p conref="../shared/impala_common.xml#common/default_false_0"/>
+  </conbody>
+</concept>

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/3be0f122/docs/topics/impala_alter_function.xml
----------------------------------------------------------------------
diff --git a/docs/topics/impala_alter_function.xml 
b/docs/topics/impala_alter_function.xml
new file mode 100644
index 0000000..2ea0a26
--- /dev/null
+++ b/docs/topics/impala_alter_function.xml
@@ -0,0 +1,21 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE concept PUBLIC "-//OASIS//DTD DITA Concept//EN" "concept.dtd">
+<concept audience="Cloudera" rev="1.x" id="alter_function">
+
+  <title>ALTER FUNCTION Statement</title>
+  <titlealts audience="PDF"><navtitle>ALTER FUNCTION</navtitle></titlealts>
+  <prolog>
+    <metadata>
+      <data name="Category" value="Impala"/>
+      <data name="Category" value="SQL"/>
+      <data name="Category" value="DDL"/>
+      <data name="Category" value="Developers"/>
+      <data name="Category" value="Data Analysts"/>
+    </metadata>
+  </prolog>
+
+  <conbody>
+
+    <p/>
+  </conbody>
+</concept>

[47/51] [partial] incubator-impala git commit: IMPALA-3398: Add docs to main Impala branch.

Reply via email to