5.10 versions.

jrussell Tue, 01 Nov 2016 16:14:58 -0700

Update all impala* files to the latest CDH 5.9/5.10 versions.

This will probably cause some degree of build breakage by undoing some
hiding that Laurel did to get a clean build of the SQL Ref.



Project: http://git-wip-us.apache.org/repos/asf/incubator-impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-impala/commit/3c2c8f12
Tree: http://git-wip-us.apache.org/repos/asf/incubator-impala/tree/3c2c8f12
Diff: http://git-wip-us.apache.org/repos/asf/incubator-impala/diff/3c2c8f12

Branch: refs/heads/doc_prototype
Commit: 3c2c8f127664327daf3da8ffc3b61ce1da6233cc
Parents: 77e0fb4
Author: John Russell <[email protected]>
Authored: Tue Nov 1 16:10:28 2016 -0700
Committer: John Russell <[email protected]>
Committed: Tue Nov 1 16:10:28 2016 -0700

----------------------------------------------------------------------
 docs/topics/cdh_ig_parquet.xml                  |   20 -
 .../impala_abort_on_default_limit_exceeded.xml  |    3 +
 docs/topics/impala_abort_on_error.xml           |    4 +
 docs/topics/impala_admission.xml                |  924 +++-
 docs/topics/impala_aggregate_functions.xml      |    2 +-
 docs/topics/impala_aliases.xml                  |   30 +-
 .../topics/impala_allow_unsupported_formats.xml |    2 +
 docs/topics/impala_alter_table.xml              |  451 +-
 docs/topics/impala_alter_view.xml               |   17 +-
 docs/topics/impala_analytic_functions.xml       |   47 +-
 docs/topics/impala_appx_count_distinct.xml      |    8 +-
 docs/topics/impala_appx_median.xml              |    6 +-
 docs/topics/impala_array.xml                    |    3 +
 docs/topics/impala_authorization.xml            | 1599 ++++++-
 docs/topics/impala_avg.xml                      |    6 +-
 docs/topics/impala_avro.xml                     |  537 ++-
 docs/topics/impala_batch_size.xml               |    5 +
 docs/topics/impala_bigint.xml                   |   10 +-
 docs/topics/impala_bit_functions.xml            |   10 +-
 docs/topics/impala_boolean.xml                  |   42 +-
 docs/topics/impala_char.xml                     |    7 +-
 docs/topics/impala_comments.xml                 |    2 +
 docs/topics/impala_complex_types.xml            |   34 +-
 docs/topics/impala_components.xml               |  165 +-
 docs/topics/impala_compression_codec.xml        |   11 +-
 docs/topics/impala_compute_stats.xml            |   26 +-
 docs/topics/impala_conditional_functions.xml    |   14 +-
 docs/topics/impala_conversion_functions.xml     |    4 +-
 docs/topics/impala_count.xml                    |   10 +-
 docs/topics/impala_create_database.xml          |   26 +-
 docs/topics/impala_create_function.xml          |  219 +-
 docs/topics/impala_create_role.xml              |    6 +-
 docs/topics/impala_create_table.xml             |  282 +-
 docs/topics/impala_create_view.xml              |    5 +-
 docs/topics/impala_databases.xml                |    2 +-
 docs/topics/impala_datetime_functions.xml       | 1163 ++++-
 docs/topics/impala_debug_action.xml             |    5 +
 docs/topics/impala_decimal.xml                  |  137 +-
 docs/topics/impala_default_order_by_limit.xml   |    3 +
 docs/topics/impala_delete.xml                   |    7 +-
 docs/topics/impala_describe.xml                 |  330 +-
 docs/topics/impala_disable_codegen.xml          |    2 +
 docs/topics/impala_disable_unsafe_spills.xml    |    9 +-
 docs/topics/impala_disk_space.xml               |  114 +-
 docs/topics/impala_distinct.xml                 |    2 +
 docs/topics/impala_dml.xml                      |    4 +-
 docs/topics/impala_double.xml                   |    6 +-
 docs/topics/impala_drop_database.xml            |   10 +-
 docs/topics/impala_drop_function.xml            |   69 +-
 docs/topics/impala_drop_role.xml                |    6 +-
 docs/topics/impala_drop_stats.xml               |   72 +-
 docs/topics/impala_drop_table.xml               |   32 +-
 docs/topics/impala_drop_view.xml                |    3 +-
 .../impala_exec_single_node_rows_threshold.xml  |   13 +-
 docs/topics/impala_explain.xml                  |   22 +-
 docs/topics/impala_explain_level.xml            |  262 +-
 docs/topics/impala_explain_plan.xml             |  552 ++-
 docs/topics/impala_float.xml                    |    6 +-
 docs/topics/impala_functions.xml                |   10 +-
 docs/topics/impala_functions_overview.xml       |    2 +-
 docs/topics/impala_grant.xml                    |   12 +-
 docs/topics/impala_group_by.xml                 |    2 +
 docs/topics/impala_group_concat.xml             |    4 +-
 docs/topics/impala_hadoop.xml                   |  149 +-
 docs/topics/impala_having.xml                   |    2 +
 docs/topics/impala_hbase.xml                    |  887 +++-
 docs/topics/impala_hbase_cache_blocks.xml       |   16 +-
 docs/topics/impala_hbase_caching.xml            |   16 +-
 docs/topics/impala_hints.xml                    |    8 +-
 docs/topics/impala_identifiers.xml              |    2 +-
 docs/topics/impala_impala_shell.xml             |   89 +-
 docs/topics/impala_incompatible_changes.xml     | 1504 ++++++-
 docs/topics/impala_insert.xml                   |   25 +-
 docs/topics/impala_install.xml                  |  121 +-
 docs/topics/impala_int.xml                      |    6 +-
 docs/topics/impala_invalidate_metadata.xml      |   17 +-
 docs/topics/impala_isilon.xml                   |   97 +-
 docs/topics/impala_jdbc.xml                     |  373 +-
 docs/topics/impala_joins.xml                    |   16 +-
 docs/topics/impala_kudu.xml                     |  147 +-
 docs/topics/impala_langref.xml                  |  189 +-
 docs/topics/impala_langref_sql.xml              |    2 +-
 docs/topics/impala_langref_unsupported.xml      |   50 +-
 docs/topics/impala_limit.xml                    |    2 +
 docs/topics/impala_literals.xml                 |    2 +-
 docs/topics/impala_live_progress.xml            |   13 +-
 docs/topics/impala_live_summary.xml             |   10 +-
 docs/topics/impala_load_data.xml                |   19 +-
 docs/topics/impala_logging.xml                  |  460 +-
 docs/topics/impala_map.xml                      |    3 +
 docs/topics/impala_math_functions.xml           |  157 +-
 docs/topics/impala_max.xml                      |   16 +-
 docs/topics/impala_max_errors.xml               |    3 +
 docs/topics/impala_max_io_buffers.xml           |    3 +
 docs/topics/impala_max_scan_range_length.xml    |   10 +
 docs/topics/impala_mem_limit.xml                |    8 +-
 docs/topics/impala_min.xml                      |   12 +-
 docs/topics/impala_misc_functions.xml           |   52 +-
 docs/topics/impala_ndv.xml                      |    8 +-
 docs/topics/impala_new_features.xml             | 4006 +++++++++++++++++-
 docs/topics/impala_num_nodes.xml                |   17 +-
 docs/topics/impala_num_scanner_threads.xml      |    5 +-
 docs/topics/impala_odbc.xml                     |  183 +-
 docs/topics/impala_offset.xml                   |    2 +
 docs/topics/impala_operators.xml                |  738 +++-
 docs/topics/impala_order_by.xml                 |   18 +-
 docs/topics/impala_parquet.xml                  | 1136 ++++-
 .../topics/impala_parquet_compression_codec.xml |    1 +
 docs/topics/impala_parquet_file_size.xml        |    4 +
 docs/topics/impala_partitioning.xml             |  580 ++-
 docs/topics/impala_perf_hdfs_caching.xml        |  593 ++-
 docs/topics/impala_perf_joins.xml               |  488 ++-
 docs/topics/impala_perf_stats.xml               | 1013 ++++-
 docs/topics/impala_performance.xml              |  177 +-
 docs/topics/impala_porting.xml                  |    1 +
 docs/topics/impala_prereqs.xml                  |  342 +-
 docs/topics/impala_processes.xml                |  124 +-
 docs/topics/impala_query_options.xml            |   44 +-
 docs/topics/impala_query_timeout_s.xml          |   11 +-
 docs/topics/impala_real.xml                     |    2 +-
 docs/topics/impala_refresh.xml                  |  160 +-
 docs/topics/impala_request_pool.xml             |   22 +-
 .../impala_reservation_request_timeout.xml      |    7 +-
 docs/topics/impala_resource_management.xml      |  322 +-
 docs/topics/impala_revoke.xml                   |   12 +-
 docs/topics/impala_s3.xml                       |  777 +++-
 docs/topics/impala_scalability.xml              |  816 +++-
 docs/topics/impala_schema_objects.xml           |    2 +-
 docs/topics/impala_security.xml                 |  106 +-
 docs/topics/impala_select.xml                   |    5 +-
 docs/topics/impala_set.xml                      |  108 +-
 docs/topics/impala_shell_options.xml            |  564 ++-
 docs/topics/impala_show.xml                     |  275 +-
 docs/topics/impala_smallint.xml                 |    6 +-
 docs/topics/impala_stddev.xml                   |    4 +-
 docs/topics/impala_string.xml                   |   41 +-
 docs/topics/impala_string_functions.xml         |  211 +-
 docs/topics/impala_struct.xml                   |  369 +-
 docs/topics/impala_subqueries.xml               |   16 +-
 docs/topics/impala_sum.xml                      |   19 +-
 docs/topics/impala_support_start_over.xml       |    1 +
 docs/topics/impala_sync_ddl.xml                 |    9 +-
 docs/topics/impala_tables.xml                   |    2 +-
 docs/topics/impala_timeouts.xml                 |  167 +-
 docs/topics/impala_timestamp.xml                |    8 +-
 docs/topics/impala_tinyint.xml                  |   10 +-
 docs/topics/impala_truncate_table.xml           |   49 +-
 docs/topics/impala_tutorial.xml                 | 2485 ++++++++++-
 docs/topics/impala_txtfile.xml                  |  792 +++-
 docs/topics/impala_udf.xml                      |  306 +-
 docs/topics/impala_union.xml                    |    2 +
 docs/topics/impala_update.xml                   |    7 +-
 docs/topics/impala_upgrading.xml                |  364 +-
 docs/topics/impala_use.xml                      |    4 +-
 docs/topics/impala_v_cpu_cores.xml              |    8 +-
 docs/topics/impala_varchar.xml                  |   10 +-
 docs/topics/impala_variance.xml                 |    4 +-
 docs/topics/impala_views.xml                    |    7 +-
 docs/topics/impala_with.xml                     |    2 +
 docs/topics/rg_impala_vd.xml                    | 1165 -----
 docs/topics/sg_hive_sql.xml                     |   27 -
 docs/topics/sg_redaction.xml                    |   22 -
 162 files changed, 27657 insertions(+), 3013 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/3c2c8f12/docs/topics/cdh_ig_parquet.xml
----------------------------------------------------------------------
diff --git a/docs/topics/cdh_ig_parquet.xml b/docs/topics/cdh_ig_parquet.xml
deleted file mode 100644
index 2eba8b3..0000000
--- a/docs/topics/cdh_ig_parquet.xml
+++ /dev/null
@@ -1,20 +0,0 @@
-<?xml version="1.0" encoding="UTF-8"?>
-<!DOCTYPE concept PUBLIC "-//OASIS//DTD DITA Concept//EN" "concept.dtd">
-<concept xmlns:ditaarch="http://dita.oasis-open.org/architecture/2005/"; 
id="parquet_format" xml:lang="en-US" ditaarch:DITAArchVersion="1.2" 
domains="(topic concept)                            (topic hi-d)                
             (topic ut-d)                             (topic indexing-d)        
                    (topic hazard-d)                            (topic 
abbrev-d)                            (topic pr-d)                             
(topic sw-d)                            (topic ui-d)    ">
-  <title>Parquet Files</title>
-  
-
-  <conbody>
-
-    <p><xref href="https://parquet.apache.org/"; format="html" scope="external"
-        >Apache Parquet</xref> is a <xref
-        href="http://en.wikipedia.org/wiki/Column-oriented_DBMS"; format="html"
-        scope="external">columnar storage</xref> format available to any
-      component in the Hadoop ecosystem, regardless of the data processing
-      framework, data model, or programming language. The Parquet file format
-      incorporates several features that support data warehouse-style
-      operations:</p>
-    
-    </conbody>
-  </concept>
-

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/3c2c8f12/docs/topics/impala_abort_on_default_limit_exceeded.xml
----------------------------------------------------------------------
diff --git a/docs/topics/impala_abort_on_default_limit_exceeded.xml 
b/docs/topics/impala_abort_on_default_limit_exceeded.xml
index c58be63..5b159a7 100644
--- a/docs/topics/impala_abort_on_default_limit_exceeded.xml
+++ b/docs/topics/impala_abort_on_default_limit_exceeded.xml
@@ -3,10 +3,13 @@
 <concept rev="obwl" id="abort_on_default_limit_exceeded">
 
   <title>ABORT_ON_DEFAULT_LIMIT_EXCEEDED Query Option</title>
+  <titlealts 
audience="PDF"><navtitle>ABORT_ON_DEFAULT_LIMIT_EXCEEDED</navtitle></titlealts>
   <prolog>
     <metadata>
       <data name="Category" value="Impala"/>
       <data name="Category" value="Impala Query Options"/>
+      <data name="Category" value="Developers"/>
+      <data name="Category" value="Data Analysts"/>
     </metadata>
   </prolog>
 

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/3c2c8f12/docs/topics/impala_abort_on_error.xml
----------------------------------------------------------------------
diff --git a/docs/topics/impala_abort_on_error.xml 
b/docs/topics/impala_abort_on_error.xml
index 1926333..950f8a4 100644
--- a/docs/topics/impala_abort_on_error.xml
+++ b/docs/topics/impala_abort_on_error.xml
@@ -3,11 +3,15 @@
 <concept id="abort_on_error">
 
   <title>ABORT_ON_ERROR Query Option</title>
+  <titlealts audience="PDF"><navtitle>ABORT_ON_ERROR</navtitle></titlealts>
   <prolog>
     <metadata>
       <data name="Category" value="Impala"/>
       <data name="Category" value="Impala Query Options"/>
       <data name="Category" value="Troubleshooting"/>
+      <data name="Category" value="Querying"/>
+      <data name="Category" value="Developers"/>
+      <data name="Category" value="Data Analysts"/>
     </metadata>
   </prolog>
 

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/3c2c8f12/docs/topics/impala_admission.xml
----------------------------------------------------------------------
diff --git a/docs/topics/impala_admission.xml b/docs/topics/impala_admission.xml
index 0921546..1a73cff 100644
--- a/docs/topics/impala_admission.xml
+++ b/docs/topics/impala_admission.xml
@@ -3,7 +3,14 @@
 <concept rev="1.3.0" id="admission_control">
 
   <title>Admission Control and Query Queuing</title>
-  
+  <prolog>
+    <metadata>
+      <data name="Category" value="Impala"/>
+      <data name="Category" value="Querying"/>
+      <data name="Category" value="Admission Control"/>
+      <data name="Category" value="Resource Management"/>
+    </metadata>
+  </prolog>
 
   <conbody>
 
@@ -18,7 +25,920 @@
       incoming queries wait to begin execution. These queries are
       queued and are admitted (that is, begin executing) when the resources 
become available.
     </p>
-    
+    <p>
+      In addition to the threshold values for currently executing queries,
+      you can place limits on the maximum number of queries that are
+      queued (waiting) and a limit on the amount of time they might wait
+      before returning with an error. These queue settings let you ensure that 
queries do
+      not wait indefinitely, so that you can detect and correct 
<q>starvation</q> scenarios.
+    </p>
+    <p>
+      Enable this feature if your cluster is
+      underutilized at some times and overutilized at others. Overutilization 
is indicated by performance
+      bottlenecks and queries being cancelled due to out-of-memory conditions, 
when those same queries are
+      successful and perform well during times with less concurrent load. 
Admission control works as a safeguard to
+      avoid out-of-memory conditions during heavy concurrent usage.
+    </p>
+
+    <note conref="../shared/impala_common.xml#common/impala_llama_obsolete"/>
+
+    <p outputclass="toc inpage"/>
+  </conbody>
+
+  <concept id="admission_intro">
+
+    <title>Overview of Impala Admission Control</title>
+  <prolog>
+    <metadata>
+      <data name="Category" value="Concepts"/>
+    </metadata>
+  </prolog>
+
+    <conbody>
+
+      <p>
+        On a busy CDH cluster, you might find there is an optimal number of 
Impala queries that run concurrently.
+        For example, when the I/O capacity is fully utilized by I/O-intensive 
queries,
+        you might not find any throughput benefit in running more concurrent 
queries.
+        By allowing some queries to run at full speed while others wait, 
rather than having
+        all queries contend for resources and run slowly, admission control 
can result in higher overall throughput.
+      </p>
+
+      <p>
+        For another example, consider a memory-bound workload such as many 
large joins or aggregation queries.
+        Each such query could briefly use many gigabytes of memory to process 
intermediate results.
+        Because Impala by default cancels queries that exceed the specified 
memory limit,
+        running multiple large-scale queries at once might require
+        re-running some queries that are cancelled. In this case, admission 
control improves the
+        reliability and stability of the overall workload by only allowing as 
many concurrent queries
+        as the overall memory of the cluster can accomodate.
+      </p>
+
+      <p>
+        The admission control feature lets you set an upper limit on the 
number of concurrent Impala
+        queries and on the memory used by those queries. Any additional 
queries are queued until the earlier ones
+        finish, rather than being cancelled or running slowly and causing 
contention. As other queries finish, the
+        queued queries are allowed to proceed.
+      </p>
+
+      <p rev="2.5.0">
+        In CDH 5.7 / Impala 2.5 and higher, you can specify these limits and 
thresholds for each
+        pool rather than globally. That way, you can balance the resource 
usage and throughput
+        between steady well-defined workloads, rare resource-intensive 
queries, and ad hoc
+        exploratory queries.
+      </p>
+
+      <p>
+        For details on the internal workings of admission control, see
+        <xref href="impala_admission.xml#admission_architecture"/>.
+      </p>
+    </conbody>
+  </concept>
+
+  <concept id="admission_concurrency">
+    <title>Concurrent Queries and Admission Control</title>
+    <conbody>
+      <p>
+        One way to limit resource usage through admission control is to set an 
upper limit
+        on the number of concurrent queries. This is the initial technique you 
might use
+        when you do not have extensive information about memory usage for your 
workload.
+        This setting can be specified separately for each dynamic resource 
pool.
+      </p>
+      <p>
+        You can combine this setting with the memory-based approach described 
in
+        <xref href="impala_admission.xml#admission_memory"/>. If either the 
maximum number of
+        or the expected memory usage of the concurrent queries is exceeded, 
subsequent queries
+        are queued until the concurrent workload falls below the threshold 
again.
+      </p>
+      <p>
+        See <xref href="cm_mc_resource_pools.xml#concept_xkk_l1d_wr"/> for 
information about all these dynamic resource
+        pool settings, how to use them together, and how to divide different 
parts of your workload among
+        different pools.
+      </p>
+    </conbody>
+  </concept>
+
+  <concept id="admission_memory">
+    <title>Memory Limits and Admission Control</title>
+    <conbody>
+      <p>
+        Each dynamic resource pool can have an upper limit on the cluster-wide 
memory used by queries executing in that pool.
+        This is the technique to use once you have a stable workload with 
well-understood memory requirements.
+      </p>
+      <p>
+        Always specify the <uicontrol>Default Query Memory Limit</uicontrol> 
for the expected maximum amount of RAM
+        that a query might require on each host, which is equivalent to 
setting the <codeph>MEM_LIMIT</codeph>
+        query option for every query run in that pool. That value affects the 
execution of each query, preventing it
+        from overallocating memory on each host, and potentially activating 
the spill-to-disk mechanism or cancelling
+        the query when necessary.
+      </p>
+      <p>
+        Optionally, specify the <uicontrol>Max Memory</uicontrol> setting, a 
cluster-wide limit that determines
+        how many queries can be safely run concurrently, based on the upper 
memory limit per host multiplied by the
+        number of Impala nodes in the cluster.
+      </p>
+      <p 
conref="../shared/impala_common.xml#common/admission_control_mem_limit_interaction"/>
+      <note 
conref="../shared/impala_common.xml#common/max_memory_default_limit_caveat"/>
+      <p>
+        You can combine the memory-based settings with the upper limit on 
concurrent queries described in
+        <xref href="impala_admission.xml#admission_concurrency"/>. If either 
the maximum number of
+        or the expected memory usage of the concurrent queries is exceeded, 
subsequent queries
+        are queued until the concurrent workload falls below the threshold 
again.
+      </p>
+      <p>
+        See <xref href="cm_mc_resource_pools.xml#concept_xkk_l1d_wr"/> for 
information about all these dynamic resource
+        pool settings, how to use them together, and how to divide different 
parts of your workload among
+        different pools.
+      </p>
+    </conbody>
+  </concept>
+
+  <concept id="admission_yarn">
+
+    <title>How Impala Admission Control Relates to Other Resource Management 
Tools</title>
+  <prolog>
+    <metadata>
+      <data name="Category" value="Concepts"/>
+    </metadata>
+  </prolog>
+
+    <conbody>
+
+      <p>
+        The admission control feature is similar in some ways to the Cloudera 
Manager
+        static partitioning feature, as well as the YARN resource management 
framework. These features
+        can be used separately or together. This section describes some 
similarities and differences, to help you
+        decide which combination of resource management features to use for 
Impala.
+      </p>
+
+      <p>
+        Admission control is a lightweight, decentralized system that is 
suitable for workloads consisting
+        primarily of Impala queries and other SQL statements. It sets 
<q>soft</q> limits that smooth out Impala
+        memory usage during times of heavy load, rather than taking an 
all-or-nothing approach that cancels jobs
+        that are too resource-intensive.
+      </p>
+
+      <p>
+        Because the admission control system does not interact with other 
Hadoop workloads such as MapReduce jobs, you
+        might use YARN with static service pools on CDH 5 clusters where 
resources are shared between
+        Impala and other Hadoop components. This configuration is recommended 
when using Impala in a
+        <term>multitenant</term> cluster. Devote a percentage of cluster 
resources to Impala, and allocate another
+        percentage for MapReduce and other batch-style workloads. Let 
admission control handle the concurrency and
+        memory usage for the Impala work within the cluster, and let YARN 
manage the work for other components within the
+        cluster. In this scenario, Impala's resources are not managed by YARN.
+      </p>
+
+      <p>
+        The Impala admission control feature uses the same configuration 
mechanism as the YARN resource manager to map users to
+        pools and authenticate them.
+      </p>
+
+      <p rev="DOCS-648">
+        Although the Impala admission control feature uses a 
<codeph>fair-scheduler.xml</codeph> configuration file
+        behind the scenes, this file does not depend on which scheduler is 
used for YARN. You still use this file,
+        and Cloudera Manager can generate it for you, even when YARN is using 
the capacity scheduler.
+      </p>
+
+    </conbody>
+  </concept>
+
+  <concept id="admission_architecture">
+
+    <title>How Impala Schedules and Enforces Limits on Concurrent 
Queries</title>
+  <prolog>
+    <metadata>
+      <data name="Category" value="Concepts"/>
+      <data name="Category" value="Scheduling"/>
+    </metadata>
+  </prolog>
+
+    <conbody>
+
+      <p>
+        The admission control system is decentralized, embedded in each Impala 
daemon and communicating through the
+        statestore mechanism. Although the limits you set for memory usage and 
number of concurrent queries apply
+        cluster-wide, each Impala daemon makes its own decisions about whether 
to allow each query to run
+        immediately or to queue it for a less-busy time. These decisions are 
fast, meaning the admission control
+        mechanism is low-overhead, but might be imprecise during times of 
heavy load across many coordinators. There could be times when the
+        more queries were queued (in aggregate across the cluster) than the 
specified limit, or when number of admitted queries
+        exceeds the expected number. Thus, you typically err on the
+        high side for the size of the queue, because there is not a big 
penalty for having a large number of queued
+        queries; and you typically err on the low side for configuring memory 
resources, to leave some headroom in case more
+        queries are admitted than expected, without running out of memory and 
being cancelled as a result.
+      </p>
+
+<!-- Commenting out as redundant.
+      <p>
+        The limit on the number of concurrent queries is a <q>soft</q> one, To 
achieve high throughput, Impala
+        makes quick decisions at the host level about which queued queries to 
dispatch. Therefore, Impala might
+        slightly exceed the limits from time to time.
+      </p>
+-->
+
+      <p>
+        To avoid a large backlog of queued requests, you can set an upper 
limit on the size of the queue for
+        queries that are queued. When the number of queued queries exceeds 
this limit, further queries are
+        cancelled rather than being queued. You can also configure a timeout 
period per pool, after which queued queries are
+        cancelled, to avoid indefinite waits. If a cluster reaches this state 
where queries are cancelled due to
+        too many concurrent requests or long waits for query execution to 
begin, that is a signal for an
+        administrator to take action, either by provisioning more resources, 
scheduling work on the cluster to
+        smooth out the load, or by doing <xref 
href="impala_performance.xml#performance">Impala performance
+        tuning</xref> to enable higher throughput.
+      </p>
+    </conbody>
+  </concept>
+
+  <concept id="admission_jdbc_odbc">
+
+    <title>How Admission Control works with Impala Clients (JDBC, ODBC, 
HiveServer2)</title>
+  <prolog>
+    <metadata>
+      <data name="Category" value="JDBC"/>
+      <data name="Category" value="ODBC"/>
+      <data name="Category" value="HiveServer2"/>
+      <data name="Category" value="Concepts"/>
+    </metadata>
+  </prolog>
+
+    <conbody>
+
+      <p>
+        Most aspects of admission control work transparently with client 
interfaces such as JDBC and ODBC:
+      </p>
+
+      <ul>
+        <li>
+          If a SQL statement is put into a queue rather than running 
immediately, the API call blocks until the
+          statement is dequeued and begins execution. At that point, the 
client program can request to fetch
+          results, which might also block until results become available.
+        </li>
+
+        <li>
+          If a SQL statement is cancelled because it has been queued for too 
long or because it exceeded the memory
+          limit during execution, the error is returned to the client program 
with a descriptive error message.
+        </li>
+
+      </ul>
+
+      <p rev="CDH-27667">
+        In Impala 2.0 and higher, you can submit
+        a SQL <codeph>SET</codeph> statement from the client application
+        to change the <codeph>REQUEST_POOL</codeph> query option.
+        This option lets you submit queries to different resource pools,
+        as described in <xref href="impala_request_pool.xml#request_pool"/>.
+<!-- Commenting out as starting to be too old to mention.
+        Prior to Impala 2.0, that option was only settable
+        for a session through the <cmdname>impala-shell</cmdname> 
<codeph>SET</codeph> command, or cluster-wide through an
+        <cmdname>impalad</cmdname> startup option.
+-->
+      </p>
+
+      <p>
+        At any time, the set of queued queries could include queries submitted 
through multiple different Impala
+        daemon hosts. All the queries submitted through a particular host will 
be executed in order, so a
+        <codeph>CREATE TABLE</codeph> followed by an <codeph>INSERT</codeph> 
on the same table would succeed.
+        Queries submitted through different hosts are not guaranteed to be 
executed in the order they were
+        received. Therefore, if you are using load-balancing or other 
round-robin scheduling where different
+        statements are submitted through different hosts, set up all table 
structures ahead of time so that the
+        statements controlled by the queuing system are primarily queries, 
where order is not significant. Or, if a
+        sequence of statements needs to happen in strict order (such as an 
<codeph>INSERT</codeph> followed by a
+        <codeph>SELECT</codeph>), submit all those statements through a single 
session, while connected to the same
+        Impala daemon host.
+      </p>
+
+      <p>
+        Admission control has the following limitations or special behavior 
when used with JDBC or ODBC
+        applications:
+      </p>
+
+      <ul>
+        <li>
+          The other resource-related query options,
+          <codeph>RESERVATION_REQUEST_TIMEOUT</codeph> and 
<codeph>V_CPU_CORES</codeph>, are no longer used. Those query options only
+          applied to using Impala with Llama, which is no longer supported.
+        </li>
+      </ul>
+    </conbody>
+  </concept>
+
+  <concept id="admission_schema_config">
+    <title>SQL and Schema Considerations for Admission Control</title>
+    <conbody>
+      <p>
+        When queries complete quickly and are tuned for optimal memory usage, 
there is less chance of
+        performance or capacity problems during times of heavy load. Before 
setting up admission control,
+        tune your Impala queries to ensure that the query plans are efficient 
and the memory estimates
+        are accurate. Understanding the nature of your workload, and which 
queries are the most
+        resource-intensive, helps you to plan how to divide the queries into 
different pools and
+        decide what limits to define for each pool.
+      </p>
+      <p>
+        For large tables, especially those involved in join queries, keep 
their statistics up to date
+        after loading substantial amounts of new data or adding new partitions.
+        Use the <codeph>COMPUTE STATS</codeph> statement for unpartitioned 
tables, and
+        <codeph>COMPUTE INCREMENTAL STATS</codeph> for partitioned tables.
+      </p>
+      <p>
+        When you use dynamic resource pools with a <uicontrol>Max 
Memory</uicontrol> setting enabled,
+        you typically override the memory estimates that Impala makes based on 
the statistics from the
+        <codeph>COMPUTE STATS</codeph> statement.
+        You either set the <codeph>MEM_LIMIT</codeph> query option within a 
particular session to
+        set an upper memory limit for queries within that session, or a 
default <codeph>MEM_LIMIT</codeph>
+        setting for all queries processed by the <cmdname>impalad</cmdname> 
instance, or
+        a default <codeph>MEM_LIMIT</codeph> setting for all queries assigned 
to a particular
+        dynamic resource pool. By designating a consistent memory limit for a 
set of similar queries
+        that use the same resource pool, you avoid unnecessary query queuing 
or out-of-memory conditions
+        that can arise during high-concurrency workloads when memory estimates 
for some queries are inaccurate.
+      </p>
+      <p>
+        Follow other steps from <xref 
href="impala_performance.xml#performance"/> to tune your queries.
+      </p>
+    </conbody>
+  </concept>
+
+
+  <concept id="admission_config">
+
+    <title>Configuring Admission Control</title>
+  <prolog>
+    <metadata>
+      <data name="Category" value="Configuring"/>
+    </metadata>
+  </prolog>
+
+    <conbody>
+
+      <p>
+        The configuration options for admission control range from the simple 
(a single resource pool with a single
+        set of options) to the complex (multiple resource pools with different 
options, each pool handling queries
+        for a different set of users and groups). Cloudera recommends 
configuring the settings through the Cloudera Manager user
+        interface.
+        <!--
+        , or on a system without Cloudera Manager by editing configuration 
files or through startup
+        options to the <cmdname>impalad</cmdname> daemon.
+        -->
+      </p>
+
+<!-- To do: reconcile the similar notes in impala_admission.xml and 
admin_impala_admission_control.xml
+     and make into a conref in both places. -->
+      <note type="important">
+        Although the following options are still present in the Cloudera 
Manager interface under the
+        <uicontrol>Admission Control</uicontrol> configuration settings dialog,
+        Cloudera recommends you not use them in CDH 5.7 / Impala 2.5 and 
higher.
+        These settings only apply if you enable admission control but leave 
dynamic resource pools disabled.
+        In CDH 5.7 / Impala 2.5 and higher, prefer to set up dynamic resource 
pools and
+        customize the settings for each pool, as described in
+        <xref 
href="cm_mc_resource_pools.xml#concept_xkk_l1d_wr/section_p15_mhn_2v"/>
+        and
+        <xref 
href="cm_mc_resource_pools.xml#concept_xkk_l1d_wr/section_gph_tnk_lm"/>.
+      </note>
+
+      <section id="admission_flags">
+
+        <title>Impala Service Flags for Admission Control (Advanced)</title>
+
+        <p>
+          The following Impala configuration options let you adjust the 
settings of the admission control feature. When supplying the
+          options on the <cmdname>impalad</cmdname> command line, prepend the 
option name with <codeph>--</codeph>.
+        </p>
+
+        <dl id="admission_control_option_list">
+          <dlentry id="queue_wait_timeout_ms">
+            <dt>
+              <codeph>queue_wait_timeout_ms</codeph>
+            </dt>
+            <dd>
+              <indexterm 
audience="Cloudera">--queue_wait_timeout_ms</indexterm>
+              <b>Purpose:</b> Maximum amount of time (in milliseconds) that a
+              request waits to be admitted before timing out.
+              <p>
+                <b>Type:</b> <codeph>int64</codeph>
+              </p>
+              <p>
+                <b>Default:</b> <codeph>60000</codeph>
+              </p>
+            </dd>
+          </dlentry>
+          <dlentry id="default_pool_max_requests">
+            <dt>
+              <codeph>default_pool_max_requests</codeph>
+            </dt>
+            <dd>
+              <indexterm 
audience="Cloudera">--default_pool_max_requests</indexterm>
+              <b>Purpose:</b> Maximum number of concurrent outstanding requests
+              allowed to run before incoming requests are queued. Because this
+              limit applies cluster-wide, but each Impala node makes 
independent
+              decisions to run queries immediately or queue them, it is a soft
+              limit; the overall number of concurrent queries might be slightly
+              higher during times of heavy load. A negative value indicates no
+              limit. Ignored if <codeph>fair_scheduler_config_path</codeph> and
+                <codeph>llama_site_path</codeph> are set. <p>
+                <b>Type:</b>
+                <codeph>int64</codeph>
+              </p>
+              <p>
+                <b>Default:</b>
+                <ph rev="2.5.0">-1, meaning unlimited (prior to CDH 5.7 / 
Impala 2.5, the default was 200)</ph>
+              </p>
+            </dd>
+          </dlentry>
+          <dlentry id="default_pool_max_queued">
+            <dt>
+              <codeph>default_pool_max_queued</codeph>
+            </dt>
+            <dd>
+              <indexterm 
audience="Cloudera">--default_pool_max_queued</indexterm>
+              <b>Purpose:</b> Maximum number of requests allowed to be queued
+              before rejecting requests. Because this limit applies
+              cluster-wide, but each Impala node makes independent decisions to
+              run queries immediately or queue them, it is a soft limit; the
+              overall number of queued queries might be slightly higher during
+              times of heavy load. A negative value or 0 indicates requests are
+              always rejected once the maximum concurrent requests are
+              executing. Ignored if <codeph>fair_scheduler_config_path</codeph>
+              and <codeph>llama_site_path</codeph> are set. <p>
+                <b>Type:</b>
+                <codeph>int64</codeph>
+              </p>
+              <p>
+                <b>Default:</b>
+                <ph rev="2.5.0">unlimited</ph>
+              </p>
+            </dd>
+          </dlentry>
+          <dlentry id="default_pool_mem_limit">
+            <dt>
+              <codeph>default_pool_mem_limit</codeph>
+            </dt>
+            <dd>
+              <indexterm 
audience="Cloudera">--default_pool_mem_limit</indexterm>
+              <b>Purpose:</b> Maximum amount of memory (across the entire
+              cluster) that all outstanding requests in this pool can use 
before
+              new requests to this pool are queued. Specified in bytes,
+              megabytes, or gigabytes by a number followed by the suffix
+                <codeph>b</codeph> (optional), <codeph>m</codeph>, or
+                <codeph>g</codeph>, either uppercase or lowercase. You can
+              specify floating-point values for megabytes and gigabytes, to
+              represent fractional numbers such as <codeph>1.5</codeph>. You 
can
+              also specify it as a percentage of the physical memory by
+              specifying the suffix <codeph>%</codeph>. 0 or no setting
+              indicates no limit. Defaults to bytes if no unit is given. 
Because
+              this limit applies cluster-wide, but each Impala node makes
+              independent decisions to run queries immediately or queue them, 
it
+              is a soft limit; the overall memory used by concurrent queries
+              might be slightly higher during times of heavy load. Ignored if
+                <codeph>fair_scheduler_config_path</codeph> and
+                <codeph>llama_site_path</codeph> are set. <note
+                
conref="../shared/impala_common.xml#common/admission_compute_stats" />
+              <p conref="../shared/impala_common.xml#common/type_string" />
+              <p>
+                <b>Default:</b>
+                <codeph>""</codeph> (empty string, meaning unlimited) </p>
+            </dd>
+          </dlentry>
+          <!-- Possibly from here on down, command-line controls not 
applicable to CM. -->
+          <dlentry id="disable_admission_control">
+            <dt>
+              <codeph>disable_admission_control</codeph>
+            </dt>
+            <dd>
+              <indexterm 
audience="Cloudera">--disable_admission_control</indexterm>
+              <b>Purpose:</b> Turns off the admission control feature entirely,
+              regardless of other configuration option settings.
+              <p>
+                <b>Type:</b> Boolean </p>
+              <p>
+                <b>Default:</b>
+                <codeph>false</codeph>
+              </p>
+            </dd>
+          </dlentry>
+          <dlentry id="disable_pool_max_requests">
+            <dt>
+              <codeph>disable_pool_max_requests</codeph>
+            </dt>
+            <dd>
+              <indexterm 
audience="Cloudera">--disable_pool_max_requests</indexterm>
+              <b>Purpose:</b> Disables all per-pool limits on the maximum 
number
+              of running requests. <p>
+                <b>Type:</b> Boolean </p>
+              <p>
+                <b>Default:</b>
+                <codeph>false</codeph>
+              </p>
+            </dd>
+          </dlentry>
+          <dlentry id="disable_pool_mem_limits">
+            <dt>
+              <codeph>disable_pool_mem_limits</codeph>
+            </dt>
+            <dd>
+              <indexterm 
audience="Cloudera">--disable_pool_mem_limits</indexterm>
+              <b>Purpose:</b> Disables all per-pool mem limits. <p>
+                <b>Type:</b> Boolean </p>
+              <p>
+                <b>Default:</b>
+                <codeph>false</codeph>
+              </p>
+            </dd>
+          </dlentry>
+          <dlentry id="fair_scheduler_allocation_path">
+            <dt>
+              <codeph>fair_scheduler_allocation_path</codeph>
+            </dt>
+            <dd>
+              <indexterm 
audience="Cloudera">--fair_scheduler_allocation_path</indexterm>
+              <b>Purpose:</b> Path to the fair scheduler allocation file
+                (<codeph>fair-scheduler.xml</codeph>). <p
+                conref="../shared/impala_common.xml#common/type_string" />
+              <p>
+                <b>Default:</b>
+                <codeph>""</codeph> (empty string) </p>
+              <p>
+                <b>Usage notes:</b> Admission control only uses a small subset
+                of the settings that can go in this file, as described below.
+                For details about all the Fair Scheduler configuration 
settings,
+                see the <xref
+                  
href="http://hadoop.apache.org/docs/current/hadoop-yarn/hadoop-yarn-site/FairScheduler.html#Configuration";
+                  scope="external" format="html">Apache wiki</xref>. </p>
+            </dd>
+          </dlentry>
+          <dlentry id="llama_site_path">
+            <dt>
+              <codeph>llama_site_path</codeph>
+            </dt>
+            <dd>
+              <indexterm audience="Cloudera">--llama_site_path</indexterm>
+              <b>Purpose:</b> Path to the configuration file used by admission 
control
+                (<codeph>llama-site.xml</codeph>). If set,
+                <codeph>fair_scheduler_allocation_path</codeph> must also be 
set.
+              <p conref="../shared/impala_common.xml#common/type_string" />
+              <p>
+                <b>Default:</b> <codeph>""</codeph> (empty string) </p>
+              <p>
+                <b>Usage notes:</b> Admission control only uses a few
+                of the settings that can go in this file, as described below.
+              </p>
+            </dd>
+          </dlentry>
+        </dl>
+      </section>
     </conbody>
+
+    <concept id="admission_config_cm">
+
+<!-- TK: Maybe all this stuff overlaps with admin_impala_admission_control and 
can be delegated there. -->
+
+      <title>Configuring Admission Control Using Cloudera Manager</title>
+  <prolog>
+    <metadata>
+      <data name="Category" value="Cloudera Manager"/>
+    </metadata>
+  </prolog>
+
+      <conbody>
+
+        <p>
+          In Cloudera Manager, you can configure pools to manage queued Impala 
queries, and the options for the
+          limit on number of concurrent queries and how to handle queries that 
exceed the limit. For details, see
+          <xref 
href="http://www.cloudera.com/documentation/enterprise/latest/topics/cm_mc_managing_resources.html";
 scope="external" format="html">Managing Resources with Cloudera Manager</xref>.
+        </p>
+
+        <p audience="Cloudera"><!-- Hiding link because that subtopic is now 
hidden. -->
+          See <xref href="#admission_examples"/> for a sample setup for 
admission control under
+          Cloudera Manager.
+        </p>
+      </conbody>
+    </concept>
+
+    <concept id="admission_config_noncm">
+
+      <title>Configuring Admission Control Using the Command Line</title>
+
+      <conbody>
+
+        <p>
+          If you do not use Cloudera Manager, you use a combination of startup 
options for the Impala daemon, and
+          optionally editing or manually constructing the configuration files
+          <filepath>fair-scheduler.xml</filepath> and 
<filepath>llama-site.xml</filepath>.
+        </p>
+
+        <p>
+          For a straightforward configuration using a single resource pool 
named <codeph>default</codeph>, you can
+          specify configuration options on the command line and skip the 
<filepath>fair-scheduler.xml</filepath>
+          and <filepath>llama-site.xml</filepath> configuration files.
+        </p>
+
+        <p>
+          For an advanced configuration with multiple resource pools using 
different settings, set up the
+          <filepath>fair-scheduler.xml</filepath> and 
<filepath>llama-site.xml</filepath> configuration files
+          manually. Provide the paths to each one using the 
<cmdname>impalad</cmdname> command-line options,
+          <codeph>--fair_scheduler_allocation_path</codeph> and 
<codeph>--llama_site_path</codeph> respectively.
+        </p>
+
+        <p>
+          The Impala admission control feature only uses the Fair Scheduler 
configuration settings to determine how
+          to map users and groups to different resource pools. For example, 
you might set up different resource
+          pools with separate memory limits, and maximum number of concurrent 
and queued queries, for different
+          categories of users within your organization. For details about all 
the Fair Scheduler configuration
+          settings, see the
+          <xref 
href="http://hadoop.apache.org/docs/current/hadoop-yarn/hadoop-yarn-site/FairScheduler.html#Configuration";
 scope="external" format="html">Apache
+          wiki</xref>.
+        </p>
+
+        <p>
+          The Impala admission control feature only uses a small subset of 
possible settings from the
+          <filepath>llama-site.xml</filepath> configuration file:
+        </p>
+
+<codeblock>llama.am.throttling.maximum.placed.reservations.<varname>queue_name</varname>
+llama.am.throttling.maximum.queued.reservations.<varname>queue_name</varname>
+<ph rev="2.5.0 
IMPALA-2538">impala.admission-control.pool-default-query-options.<varname>queue_name</varname>
+impala.admission-control.pool-queue-timeout-ms.<varname>queue_name</varname></ph>
+</codeblock>
+
+        <p rev="2.5.0 IMPALA-2538">
+          The <codeph>impala.admission-control.pool-queue-timeout-ms</codeph>
+          setting specifies the timeout value for this pool, in milliseconds.
+          
The<codeph>impala.admission-control.pool-default-query-options</codeph>
+          settings designates the default query options for all queries that 
run
+          in this pool. Its argument value is a comma-delimited string of
+          'key=value' pairs, for 
example,<codeph>'key1=val1,key2=val2'</codeph>.
+          For example, this is where you might set a default memory limit
+          for all queries in the pool, using an argument such as 
<codeph>MEM_LIMIT=5G</codeph>.
+        </p>
+
+        <p rev="2.5.0 IMPALA-2538">
+          The <codeph>impala.admission-control.*</codeph> configuration 
settings are available in
+          CDH 5.7 / Impala 2.5 and higher.
+        </p>
+
+        <p audience="Cloudera"><!-- Hiding link because that subtopic is now 
hidden. -->
+          See <xref href="#admission_examples/section_etq_qgb_rq"/> for sample 
configuration files
+          for admission control using multiple resource pools, without 
Cloudera Manager.
+        </p>
+      </conbody>
+    </concept>
+
+    <concept id="admission_examples">
+    <!-- Pruning the CM examples and screenshots because in CDH 5.7 / Impala 
2.5, the defaults match up much better with our recommendations. -->
+
+      <title>Examples of Admission Control Configurations</title>
+
+      <conbody>
+
+        <section id="section_fqn_qgb_rq">
+
+          <title>Example Admission Control Configurations Using Cloudera 
Manager</title>
+
+          <p>
+            For full instructions about configuring dynamic resource pools 
through Cloudera Manager, see
+            <xref audience="integrated" 
href="cm_mc_resource_pools.xml#xd_583c10bfdbd326ba--43d5fd93-1410993f8c2--7ff2"/><xref
 audience="standalone" 
href="http://www.cloudera.com/documentation/enterprise/latest/topics/cm_mc_resource_pools.html";
 scope="external" format="html"/>.
+          </p>
+
+        </section>
+
+        <section id="section_etq_qgb_rq">
+
+          <title>Example Admission Control Configurations Using Configuration 
Files</title>
+
+          <p>
+            For clusters not managed by Cloudera Manager, here are sample 
<filepath>fair-scheduler.xml</filepath>
+            and <filepath>llama-site.xml</filepath> files that define resource 
pools <codeph>root.default</codeph>,
+            <codeph>root.development</codeph>, and 
<codeph>root.production</codeph>.
+            These sample files are stripped down: in a real deployment they
+            might contain other settings for use with various aspects of the 
YARN component. The
+            settings shown here are the significant ones for the Impala 
admission control feature.
+          </p>
+
+          <p>
+            <b>fair-scheduler.xml:</b>
+          </p>
+
+          <p>
+            Although Impala does not use the <codeph>vcores</codeph> value, 
you must still specify it to satisfy
+            YARN requirements for the file contents.
+          </p>
+
+          <p>
+            Each <codeph>&lt;aclSubmitApps&gt;</codeph> tag (other than the 
one for <codeph>root</codeph>) contains
+            a comma-separated list of users, then a space, then a 
comma-separated list of groups; these are the
+            users and groups allowed to submit Impala statements to the 
corresponding resource pool.
+          </p>
+
+          <p>
+            If you leave the <codeph>&lt;aclSubmitApps&gt;</codeph> element 
empty for a pool, nobody can submit
+            directly to that pool; child pools can specify their own 
<codeph>&lt;aclSubmitApps&gt;</codeph> values
+            to authorize users and groups to submit to those pools.
+          </p>
+
+<codeblock><![CDATA[<allocations>
+    <queue name="root">
+        <aclSubmitApps> </aclSubmitApps>
+        <queue name="default">
+            <maxResources>50000 mb, 0 vcores</maxResources>
+            <aclSubmitApps>*</aclSubmitApps>
+        </queue>
+        <queue name="development">
+            <maxResources>200000 mb, 0 vcores</maxResources>
+            <aclSubmitApps>user1,user2 dev,ops,admin</aclSubmitApps>
+        </queue>
+        <queue name="production">
+            <maxResources>1000000 mb, 0 vcores</maxResources>
+            <aclSubmitApps> ops,admin</aclSubmitApps>
+        </queue>
+    </queue>
+    <queuePlacementPolicy>
+        <rule name="specified" create="false"/>
+        <rule name="default" />
+    </queuePlacementPolicy>
+</allocations>
+]]>
+</codeblock>
+
+          <p>
+            <b>llama-site.xml:</b>
+          </p>
+
+<codeblock rev="2.5.0 IMPALA-2538"><![CDATA[
+<?xml version="1.0" encoding="UTF-8"?>
+<configuration>
+  <property>
+    <name>llama.am.throttling.maximum.placed.reservations.root.default</name>
+    <value>10</value>
+  </property>
+  <property>
+    <name>llama.am.throttling.maximum.queued.reservations.root.default</name>
+    <value>50</value>
+  </property>
+  <property>
+    
<name>impala.admission-control.pool-default-query-options.root.default</name>
+    <value>mem_limit=128m,query_timeout_s=20,max_io_buffers=10</value>
+  </property>
+  <property>
+    <name>impala.admission-control.pool-queue-timeout-ms.root.default</name>
+    <value>30000</value>
+  </property>
+  <property>
+    
<name>llama.am.throttling.maximum.placed.reservations.root.development</name>
+    <value>50</value>
+  </property>
+  <property>
+    
<name>llama.am.throttling.maximum.queued.reservations.root.development</name>
+    <value>100</value>
+  </property>
+  <property>
+    
<name>impala.admission-control.pool-default-query-options.root.development</name>
+    <value>mem_limit=256m,query_timeout_s=30,max_io_buffers=10</value>
+  </property>
+  <property>
+    
<name>impala.admission-control.pool-queue-timeout-ms.root.development</name>
+    <value>15000</value>
+  </property>
+  <property>
+    
<name>llama.am.throttling.maximum.placed.reservations.root.production</name>
+    <value>100</value>
+  </property>
+  <property>
+    
<name>llama.am.throttling.maximum.queued.reservations.root.production</name>
+    <value>200</value>
+  </property>
+<!--
+       Default query options for the 'root.production' pool.
+       THIS IS A NEW PARAMETER in CDH 5.7 / Impala 2.5.
+       Note that the MEM_LIMIT query option still shows up in here even though 
it is a
+       separate box in the UI. We do that because it is the most important 
query option
+       that people will need (everything else is somewhat advanced).
+
+       MEM_LIMIT takes a per-node memory limit which is specified using one of 
the following:
+        - '<int>[bB]?'  -> bytes (default if no unit given)
+        - '<float>[mM(bB)]' -> megabytes
+        - '<float>[gG(bB)]' -> in gigabytes
+        E.g. 'MEM_LIMIT=12345' (no unit) means 12345 bytes, and you can append 
m or g
+             to specify megabytes or gigabytes, though that is not required.
+-->
+  <property>
+    
<name>impala.admission-control.pool-default-query-options.root.production</name>
+    <value>mem_limit=386m,query_timeout_s=30,max_io_buffers=10</value>
+  </property>
+<!--
+  Default queue timeout (ms) for the pool 'root.production'.
+  If this isnât set, the process-wide flag is used.
+  THIS IS A NEW PARAMETER in CDH 5.7 / Impala 2.5.
+-->
+  <property>
+    <name>impala.admission-control.pool-queue-timeout-ms.root.production</name>
+    <value>30000</value>
+  </property>
+</configuration>
+]]>
+</codeblock>
+        </section>
+      </conbody>
+    </concept>
   </concept>
 
+<!-- End Config -->
+
+  <concept id="admission_guidelines">
+
+    <title>Guidelines for Using Admission Control</title>
+  <prolog>
+    <metadata>
+      <data name="Category" value="Planning"/>
+      <data name="Category" value="Guidelines"/>
+      <data name="Category" value="Best Practices"/>
+    </metadata>
+  </prolog>
+
+    <conbody>
+
+      <p>
+        To see how admission control works for particular queries, examine the 
profile output for the query. This
+        information is available through the <codeph>PROFILE</codeph> 
statement in <cmdname>impala-shell</cmdname>
+        immediately after running a query in the shell, on the 
<uicontrol>queries</uicontrol> page of the Impala
+        debug web UI, or in the Impala log file (basic information at log 
level 1, more detailed information at log
+        level 2). The profile output contains details about the admission 
decision, such as whether the query was
+        queued or not and which resource pool it was assigned to. It also 
includes the estimated and actual memory
+        usage for the query, so you can fine-tune the configuration for the 
memory limits of the resource pools.
+      </p>
+
+      <p>
+        Where practical, use Cloudera Manager to configure the admission 
control parameters. The Cloudera Manager
+        GUI is much simpler than editing the configuration files directly.
+      </p>
+
+      <p>
+        Remember that the limits imposed by admission control are <q>soft</q> 
limits.
+        The decentralized nature of this mechanism means that each Impala node 
makes its own decisions about whether
+        to allow queries to run immediately or to queue them. These decisions 
rely on information passed back and forth
+        between nodes by the statestore service. If a sudden surge in requests 
causes more queries than anticipated to run
+        concurrently, then throughput could decrease due to queries spilling 
to disk or contending for resources;
+        or queries could be cancelled if they exceed the 
<codeph>MEM_LIMIT</codeph> setting while running.
+      </p>
+
+<!--
+      <p>
+        If you have trouble getting a query to run because its estimated 
memory usage is too high, you can override
+        the estimate by setting the <codeph>MEM_LIMIT</codeph> query option in 
<cmdname>impala-shell</cmdname>,
+        then issuing the query through the shell in the same session. The 
<codeph>MEM_LIMIT</codeph> value is
+        treated as the estimated amount of memory, overriding the estimate 
that Impala would generate based on
+        table and column statistics. This value is used only for making 
admission control decisions, and is not
+        pre-allocated by the query.
+      </p>
+-->
+
+      <p>
+        In <cmdname>impala-shell</cmdname>, you can also specify which 
resource pool to direct queries to by
+        setting the <codeph>REQUEST_POOL</codeph> query option.
+      </p>
+
+      <p>
+        The statements affected by the admission control feature are primarily 
queries, but also include statements
+        that write data such as <codeph>INSERT</codeph> and <codeph>CREATE 
TABLE AS SELECT</codeph>. Most write
+        operations in Impala are not resource-intensive, but inserting into a 
Parquet table can require substantial
+        memory due to buffering intermediate data before writing out each 
Parquet data block. See
+        <xref href="impala_parquet.xml#parquet_etl"/> for instructions about 
inserting data efficiently into
+        Parquet tables.
+      </p>
+
+      <p>
+        Although admission control does not scrutinize memory usage for other 
kinds of DDL statements, if a query
+        is queued due to a limit on concurrent queries or memory usage, 
subsequent statements in the same session
+        are also queued so that they are processed in the correct order:
+      </p>
+
+<codeblock>-- This query could be queued to avoid out-of-memory at times of 
heavy load.
+select * from huge_table join enormous_table using (id);
+-- If so, this subsequent statement in the same session is also queued
+-- until the previous statement completes.
+drop table huge_table;
+</codeblock>
+
+      <p>
+        If you set up different resource pools for different users and groups, 
consider reusing any classifications
+        you developed for use with Sentry security. See <xref 
href="impala_authorization.xml#authorization"/> for details.
+      </p>
+
+      <p>
+        For details about all the Fair Scheduler configuration settings, see
+        <xref 
href="https://archive.cloudera.com/cdh5/cdh/5/hadoop/hadoop-yarn/hadoop-yarn-site/FairScheduler.html#Configuration";
 scope="external" format="html">Fair
+        Scheduler Configuration</xref>, in particular the tags such as 
<codeph>&lt;queue&gt;</codeph> and
+        <codeph>&lt;aclSubmitApps&gt;</codeph> to map users and groups to 
particular resource pools (queues).
+      </p>
+
+<!-- Wait a sec. We say admission control doesn't use 
RESERVATION_REQUEST_TIMEOUT at all.
+     What's the real story here? Matt did refer to some timeout option that was
+     available through the shell but not the DB-centric APIs.
+<p>
+  Because you cannot override query options such as
+  <codeph>RESERVATION_REQUEST_TIMEOUT</codeph>
+  in a JDBC or ODBC application, consider configuring timeout periods
+  on the application side to cancel queries that take
+  too long due to being queued during times of high load.
+</p>
+-->
+    </conbody>
+  </concept>
+</concept>
+<!-- Admission control -->

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/3c2c8f12/docs/topics/impala_aggregate_functions.xml
----------------------------------------------------------------------
diff --git a/docs/topics/impala_aggregate_functions.xml 
b/docs/topics/impala_aggregate_functions.xml
index 5095266..66d617a 100644
--- a/docs/topics/impala_aggregate_functions.xml
+++ b/docs/topics/impala_aggregate_functions.xml
@@ -3,7 +3,7 @@
 <concept id="aggregate_functions">
 
   <title>Impala Aggregate Functions</title>
-  <titlealts><navtitle>Aggregate Functions</navtitle></titlealts>
+  <titlealts audience="PDF"><navtitle>Aggregate 
Functions</navtitle></titlealts>
   <prolog>
     <metadata>
       <data name="Category" value="Impala"/>

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/3c2c8f12/docs/topics/impala_aliases.xml
----------------------------------------------------------------------
diff --git a/docs/topics/impala_aliases.xml b/docs/topics/impala_aliases.xml
index 66a16fe..fd686cd 100644
--- a/docs/topics/impala_aliases.xml
+++ b/docs/topics/impala_aliases.xml
@@ -3,7 +3,7 @@
 <concept id="aliases">
 
   <title>Overview of Impala Aliases</title>
-  <titlealts><navtitle>Aliases</navtitle></titlealts>
+  <titlealts audience="PDF"><navtitle>Aliases</navtitle></titlealts>
   <prolog>
     <metadata>
       <data name="Category" value="Impala"/>
@@ -32,18 +32,34 @@
       <codeph>AS</codeph> keyword is optional; you can also specify the alias 
immediately after the original name.
     </p>
 
+<codeblock>-- Make the column headers of the result set easier to understand.
+SELECT c1 AS name, c2 AS address, c3 AS phone FROM table_with_terse_columns;
+SELECT SUM(ss_xyz_dollars_net) AS total_sales FROM table_with_cryptic_columns;
+-- The alias can be a quoted string for extra readability.
+SELECT c1 AS "Employee ID", c2 AS "Date of hire" FROM t1;
+-- The AS keyword is optional.
+SELECT c1 "Employee ID", c2 "Date of hire" FROM t1;
+
+-- The table aliases assigned in the FROM clause can be used both earlier
+-- in the query (the SELECT list) and later (the WHERE clause).
+SELECT one.name, two.address, three.phone
+  FROM census one, building_directory two, phonebook three
+WHERE one.id = two.id and two.id = three.id;
+
+-- The aliases c1 and c2 let the query handle columns with the same names from 
2 joined tables.
+-- The aliases t1 and t2 let the query abbreviate references to long or 
cryptically named tables.
+SELECT t1.column_n AS c1, t2.column_n AS c2 FROM long_name_table AS t1, 
very_long_name_table2 AS t2
+  WHERE c1 = c2;
+SELECT t1.column_n c1, t2.column_n c2 FROM table1 t1, table2 t2
+  WHERE c1 = c2;
+</codeblock>
+
     <p>
       To use an alias name that matches one of the Impala reserved keywords 
(listed in
       <xref href="impala_reserved_words.xml#reserved_words"/>), surround the 
identifier with either single or
       double quotation marks, or <codeph>``</codeph> characters (backticks).
     </p>
 
-<codeblock>select c1 as name, c2 as address, c3 as phone from 
table_with_terse_columns;
-select sum(ss_xyz_dollars_net) as total_sales from table_with_cryptic_columns;
-select one.name, two.address, three.phone from
-  census one, building_directory two, phonebook three
-  where one.id = two.id and two.id = three.id;</codeblock>
-
     <p>
       <ph conref="../shared/impala_common.xml#common/aliases_vs_identifiers"/>
     </p>

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/3c2c8f12/docs/topics/impala_allow_unsupported_formats.xml
----------------------------------------------------------------------
diff --git a/docs/topics/impala_allow_unsupported_formats.xml 
b/docs/topics/impala_allow_unsupported_formats.xml
index 824daa4..fbf758e 100644
--- a/docs/topics/impala_allow_unsupported_formats.xml
+++ b/docs/topics/impala_allow_unsupported_formats.xml
@@ -3,10 +3,12 @@
 <concept id="allow_unsupported_formats">
 
   <title>ALLOW_UNSUPPORTED_FORMATS Query Option</title>
+  <titlealts 
audience="PDF"><navtitle>ALLOW_UNSUPPORTED_FORMATS</navtitle></titlealts>
   <prolog>
     <metadata>
       <data name="Category" value="Impala"/>
       <data name="Category" value="Impala Query Options"/>
+      <data name="Category" value="Deprecated Features"/>
     </metadata>
   </prolog>

[23/23] incubator-impala git commit: Update all impala* files to the latest CDH 5.9/5.10 versions.

Reply via email to