http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/3be0f122/docs/topics/impala.xml ---------------------------------------------------------------------- diff --git a/docs/topics/impala.xml b/docs/topics/impala.xml new file mode 100644 index 0000000..8dc7b2a --- /dev/null +++ b/docs/topics/impala.xml @@ -0,0 +1,77 @@ +<?xml version="1.0" encoding="UTF-8"?> +<!DOCTYPE concept PUBLIC "-//OASIS//DTD DITA Concept//EN" "concept.dtd"> +<concept id="about_impala"> + + <title>Apache Impala (incubating) - Interactive SQL</title> + <titlealts audience="PDF"><navtitle>Impala Guide</navtitle></titlealts> + <prolog> + <metadata> + <data name="Category" value="Impala"/> + <data name="Category" value="Components"/> + <data name="Category" value="Data Analysts"/> + <data name="Category" value="Developers"/> + <data name="Category" value="SQL"/> + </metadata> + </prolog> + + <conbody> + + <p conref="../shared/impala_common.xml#common/impala_mission_statement"/> + + <p conref="../shared/impala_common.xml#common/impala_hive_compatibility"/> + + <p conref="../shared/impala_common.xml#common/impala_advantages"/> + + <p outputclass="toc"/> + + <p audience="integrated"> + <b>Related information throughout the CDH 5 library:</b> + </p> + + <p audience="integrated"> + In CDH 5, the Impala documentation for Release Notes, Installation, Upgrading, and Security has been + integrated alongside the corresponding information for other Hadoop components: + </p> + +<!-- Same list is in impala.xml and Impala FAQs. Conref in both places. --> + + <ul> + <li> + <xref href="impala_new_features.xml#new_features">New features</xref> + </li> + + <li> + <xref href="impala_known_issues.xml#known_issues">Known and fixed issues</xref> + </li> + + <li> + <xref href="impala_incompatible_changes.xml#incompatible_changes">Incompatible changes</xref> + </li> + + <li> + <xref href="impala_install.xml#install">Installing Impala</xref> + </li> + + <li> + <xref href="impala_upgrading.xml#upgrading">Upgrading Impala</xref> + </li> + + <li> + <xref href="impala_config.xml#config">Configuring Impala</xref> + </li> + + <li> + <xref href="impala_processes.xml#processes">Starting Impala</xref> + </li> + + <li> + <xref href="impala_security.xml#security">Security for Impala</xref> + </li> + + <li> + <xref href="http://www.cloudera.com/content/cloudera-content/cloudera-docs/CDH5/latest/CDH-Version-and-Packaging-Information/CDH-Version-and-Packaging-Information.html" scope="external" format="html">CDH + Version and Packaging Information</xref> + </li> + </ul> + </conbody> +</concept>
http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/3be0f122/docs/topics/impala_abort_on_default_limit_exceeded.xml ---------------------------------------------------------------------- diff --git a/docs/topics/impala_abort_on_default_limit_exceeded.xml b/docs/topics/impala_abort_on_default_limit_exceeded.xml new file mode 100644 index 0000000..5b159a7 --- /dev/null +++ b/docs/topics/impala_abort_on_default_limit_exceeded.xml @@ -0,0 +1,23 @@ +<?xml version="1.0" encoding="UTF-8"?> +<!DOCTYPE concept PUBLIC "-//OASIS//DTD DITA Concept//EN" "concept.dtd"> +<concept rev="obwl" id="abort_on_default_limit_exceeded"> + + <title>ABORT_ON_DEFAULT_LIMIT_EXCEEDED Query Option</title> + <titlealts audience="PDF"><navtitle>ABORT_ON_DEFAULT_LIMIT_EXCEEDED</navtitle></titlealts> + <prolog> + <metadata> + <data name="Category" value="Impala"/> + <data name="Category" value="Impala Query Options"/> + <data name="Category" value="Developers"/> + <data name="Category" value="Data Analysts"/> + </metadata> + </prolog> + + <conbody> + + <p conref="../shared/impala_common.xml#common/obwl_query_options"/> + + <p conref="../shared/impala_common.xml#common/type_boolean"/> + <p conref="../shared/impala_common.xml#common/default_false_0"/> + </conbody> +</concept> http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/3be0f122/docs/topics/impala_abort_on_error.xml ---------------------------------------------------------------------- diff --git a/docs/topics/impala_abort_on_error.xml b/docs/topics/impala_abort_on_error.xml new file mode 100644 index 0000000..950f8a4 --- /dev/null +++ b/docs/topics/impala_abort_on_error.xml @@ -0,0 +1,44 @@ +<?xml version="1.0" encoding="UTF-8"?> +<!DOCTYPE concept PUBLIC "-//OASIS//DTD DITA Concept//EN" "concept.dtd"> +<concept id="abort_on_error"> + + <title>ABORT_ON_ERROR Query Option</title> + <titlealts audience="PDF"><navtitle>ABORT_ON_ERROR</navtitle></titlealts> + <prolog> + <metadata> + <data name="Category" value="Impala"/> + <data name="Category" value="Impala Query Options"/> + <data name="Category" value="Troubleshooting"/> + <data name="Category" value="Querying"/> + <data name="Category" value="Developers"/> + <data name="Category" value="Data Analysts"/> + </metadata> + </prolog> + + <conbody> + + <p> + <indexterm audience="Cloudera">ABORT_ON_ERROR query option</indexterm> + When this option is enabled, Impala cancels a query immediately when any of the nodes encounters an error, + rather than continuing and possibly returning incomplete results. This option is disabled by default, to help + gather maximum diagnostic information when an error occurs, for example, whether the same problem occurred on + all nodes or only a single node. Currently, the errors that Impala can skip over involve data corruption, + such as a column that contains a string value when expected to contain an integer value. + </p> + + <p> + To control how much logging Impala does for non-fatal errors when <codeph>ABORT_ON_ERROR</codeph> is turned + off, use the <codeph>MAX_ERRORS</codeph> option. + </p> + + <p conref="../shared/impala_common.xml#common/type_boolean"/> + <p conref="../shared/impala_common.xml#common/default_false_0"/> + + <p conref="../shared/impala_common.xml#common/related_info"/> + <p> + <xref href="impala_max_errors.xml#max_errors"/>, + <xref href="impala_logging.xml#logging"/> + </p> + + </conbody> +</concept> http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/3be0f122/docs/topics/impala_admin.xml ---------------------------------------------------------------------- diff --git a/docs/topics/impala_admin.xml b/docs/topics/impala_admin.xml new file mode 100644 index 0000000..3da7d5f --- /dev/null +++ b/docs/topics/impala_admin.xml @@ -0,0 +1,60 @@ +<?xml version="1.0" encoding="UTF-8"?> +<!DOCTYPE concept PUBLIC "-//OASIS//DTD DITA Concept//EN" "concept.dtd"> +<concept id="admin"> + + <title>Impala Administration</title> + <titlealts audience="PDF"><navtitle>Administration</navtitle></titlealts> + <prolog> + <metadata> + <data name="Category" value="Impala"/> + <data name="Category" value="Administrators"/> + <!-- Although there is a reasonable amount of info on the page, it could be better to use wiki-style embedding instead of linking hither and thither. --> + <data name="Category" value="Stub Pages"/> + </metadata> + </prolog> + + <conbody> + + <p> + As an administrator, you monitor Impala's use of resources and take action when necessary to keep Impala + running smoothly and avoid conflicts with other Hadoop components running on the same cluster. When you + detect that an issue has happened or could happen in the future, you reconfigure Impala or other components + such as HDFS or even the hardware of the cluster itself to resolve or avoid problems. + </p> + + <p outputclass="toc"/> + + <p> + <b>Related tasks:</b> + </p> + + <p> + As an administrator, you can expect to perform installation, upgrade, and configuration tasks for Impala on + all machines in a cluster. See <xref href="impala_install.xml#install"/>, + <xref href="impala_upgrading.xml#upgrading"/>, and <xref href="impala_config.xml#config"/> for details. + </p> + + <p> + For security tasks typically performed by administrators, see <xref href="impala_security.xml#security"/>. + </p> + + <p> + Administrators also decide how to allocate cluster resources so that all Hadoop components can run smoothly + together. For Impala, this task primarily involves: + <ul> + <li> + Deciding how many Impala queries can run concurrently and with how much memory, through the admission + control feature. See <xref href="impala_admission.xml#admission_control"/> for details. + </li> + + <li> + Dividing cluster resources such as memory between Impala and other components, using YARN for overall + resource management, and Llama to mediate resource requests from Impala to YARN. See + <xref href="impala_resource_management.xml#resource_management"/> for details. + </li> + </ul> + </p> + +<!-- <p conref="../shared/impala_common.xml#common/impala_mr"/> --> + </conbody> +</concept> http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/3be0f122/docs/topics/impala_admission.xml ---------------------------------------------------------------------- diff --git a/docs/topics/impala_admission.xml b/docs/topics/impala_admission.xml new file mode 100644 index 0000000..ad53823 --- /dev/null +++ b/docs/topics/impala_admission.xml @@ -0,0 +1,947 @@ +<?xml version="1.0" encoding="UTF-8"?> +<!DOCTYPE concept PUBLIC "-//OASIS//DTD DITA Concept//EN" "concept.dtd"> +<concept rev="1.3.0" id="admission_control"> + + <title>Admission Control and Query Queuing</title> + <prolog> + <metadata> + <data name="Category" value="Impala"/> + <data name="Category" value="Querying"/> + <data name="Category" value="Admission Control"/> + <data name="Category" value="Resource Management"/> + </metadata> + </prolog> + + <conbody> + + <p id="admission_control_intro"> + Admission control is an Impala feature that imposes limits on concurrent SQL queries, to avoid resource usage + spikes and out-of-memory conditions on busy CDH clusters. + It is a form of <q>throttling</q>. + New queries are accepted and executed until + certain conditions are met, such as too many queries or too much + total memory used across the cluster. + When one of these thresholds is reached, + incoming queries wait to begin execution. These queries are + queued and are admitted (that is, begin executing) when the resources become available. + </p> + <p> + In addition to the threshold values for currently executing queries, + you can place limits on the maximum number of queries that are + queued (waiting) and a limit on the amount of time they might wait + before returning with an error. These queue settings let you ensure that queries do + not wait indefinitely, so that you can detect and correct <q>starvation</q> scenarios. + </p> + <p> + Enable this feature if your cluster is + underutilized at some times and overutilized at others. Overutilization is indicated by performance + bottlenecks and queries being cancelled due to out-of-memory conditions, when those same queries are + successful and perform well during times with less concurrent load. Admission control works as a safeguard to + avoid out-of-memory conditions during heavy concurrent usage. + </p> + + <note conref="../shared/impala_common.xml#common/impala_llama_obsolete"/> + + <p outputclass="toc inpage"/> + </conbody> + + <concept id="admission_intro"> + + <title>Overview of Impala Admission Control</title> + <prolog> + <metadata> + <data name="Category" value="Concepts"/> + </metadata> + </prolog> + + <conbody> + + <p> + On a busy CDH cluster, you might find there is an optimal number of Impala queries that run concurrently. + For example, when the I/O capacity is fully utilized by I/O-intensive queries, + you might not find any throughput benefit in running more concurrent queries. + By allowing some queries to run at full speed while others wait, rather than having + all queries contend for resources and run slowly, admission control can result in higher overall throughput. + </p> + + <p> + For another example, consider a memory-bound workload such as many large joins or aggregation queries. + Each such query could briefly use many gigabytes of memory to process intermediate results. + Because Impala by default cancels queries that exceed the specified memory limit, + running multiple large-scale queries at once might require + re-running some queries that are cancelled. In this case, admission control improves the + reliability and stability of the overall workload by only allowing as many concurrent queries + as the overall memory of the cluster can accomodate. + </p> + + <p> + The admission control feature lets you set an upper limit on the number of concurrent Impala + queries and on the memory used by those queries. Any additional queries are queued until the earlier ones + finish, rather than being cancelled or running slowly and causing contention. As other queries finish, the + queued queries are allowed to proceed. + </p> + + <p rev="2.5.0"> + In <keyword keyref="impala25_full"/> and higher, you can specify these limits and thresholds for each + pool rather than globally. That way, you can balance the resource usage and throughput + between steady well-defined workloads, rare resource-intensive queries, and ad hoc + exploratory queries. + </p> + + <p> + For details on the internal workings of admission control, see + <xref href="impala_admission.xml#admission_architecture"/>. + </p> + </conbody> + </concept> + + <concept id="admission_concurrency"> + <title>Concurrent Queries and Admission Control</title> + <conbody> + <p> + One way to limit resource usage through admission control is to set an upper limit + on the number of concurrent queries. This is the initial technique you might use + when you do not have extensive information about memory usage for your workload. + This setting can be specified separately for each dynamic resource pool. + </p> + <p> + You can combine this setting with the memory-based approach described in + <xref href="impala_admission.xml#admission_memory"/>. If either the maximum number of + or the expected memory usage of the concurrent queries is exceeded, subsequent queries + are queued until the concurrent workload falls below the threshold again. + </p> + <p> + See + <xref audience="integrated" href="cm_mc_resource_pools.xml#concept_xkk_l1d_wr"/><xref audience="standalone" href="http://www.cloudera.com/documentation/enterprise/latest/topics/cm_mc_resource_pools.html" scope="external" format="html"/> + for information about all these dynamic resource + pool settings, how to use them together, and how to divide different parts of your workload among + different pools. + </p> + </conbody> + </concept> + + <concept id="admission_memory"> + <title>Memory Limits and Admission Control</title> + <conbody> + <p> + Each dynamic resource pool can have an upper limit on the cluster-wide memory used by queries executing in that pool. + This is the technique to use once you have a stable workload with well-understood memory requirements. + </p> + <p> + Always specify the <uicontrol>Default Query Memory Limit</uicontrol> for the expected maximum amount of RAM + that a query might require on each host, which is equivalent to setting the <codeph>MEM_LIMIT</codeph> + query option for every query run in that pool. That value affects the execution of each query, preventing it + from overallocating memory on each host, and potentially activating the spill-to-disk mechanism or cancelling + the query when necessary. + </p> + <p> + Optionally, specify the <uicontrol>Max Memory</uicontrol> setting, a cluster-wide limit that determines + how many queries can be safely run concurrently, based on the upper memory limit per host multiplied by the + number of Impala nodes in the cluster. + </p> + <p conref="../shared/impala_common.xml#common/admission_control_mem_limit_interaction"/> + <note conref="../shared/impala_common.xml#common/max_memory_default_limit_caveat"/> + <p> + You can combine the memory-based settings with the upper limit on concurrent queries described in + <xref href="impala_admission.xml#admission_concurrency"/>. If either the maximum number of + or the expected memory usage of the concurrent queries is exceeded, subsequent queries + are queued until the concurrent workload falls below the threshold again. + </p> + <p> + See + <xref audience="integrated" href="cm_mc_resource_pools.xml#concept_xkk_l1d_wr"/><xref audience="standalone" href="http://www.cloudera.com/documentation/enterprise/latest/topics/cm_mc_resource_pools.html" scope="external" format="html"/> + for information about all these dynamic resource + pool settings, how to use them together, and how to divide different parts of your workload among + different pools. + </p> + </conbody> + </concept> + + <concept id="admission_yarn"> + + <title>How Impala Admission Control Relates to Other Resource Management Tools</title> + <prolog> + <metadata> + <data name="Category" value="Concepts"/> + </metadata> + </prolog> + + <conbody> + + <p> + The admission control feature is similar in some ways to the Cloudera Manager + static partitioning feature, as well as the YARN resource management framework. These features + can be used separately or together. This section describes some similarities and differences, to help you + decide which combination of resource management features to use for Impala. + </p> + + <p> + Admission control is a lightweight, decentralized system that is suitable for workloads consisting + primarily of Impala queries and other SQL statements. It sets <q>soft</q> limits that smooth out Impala + memory usage during times of heavy load, rather than taking an all-or-nothing approach that cancels jobs + that are too resource-intensive. + </p> + + <p> + Because the admission control system does not interact with other Hadoop workloads such as MapReduce jobs, you + might use YARN with static service pools on CDH 5 clusters where resources are shared between + Impala and other Hadoop components. This configuration is recommended when using Impala in a + <term>multitenant</term> cluster. Devote a percentage of cluster resources to Impala, and allocate another + percentage for MapReduce and other batch-style workloads. Let admission control handle the concurrency and + memory usage for the Impala work within the cluster, and let YARN manage the work for other components within the + cluster. In this scenario, Impala's resources are not managed by YARN. + </p> + + <p> + The Impala admission control feature uses the same configuration mechanism as the YARN resource manager to map users to + pools and authenticate them. + </p> + + <p rev="DOCS-648"> + Although the Impala admission control feature uses a <codeph>fair-scheduler.xml</codeph> configuration file + behind the scenes, this file does not depend on which scheduler is used for YARN. You still use this file, + and Cloudera Manager can generate it for you, even when YARN is using the capacity scheduler. + </p> + + </conbody> + </concept> + + <concept id="admission_architecture"> + + <title>How Impala Schedules and Enforces Limits on Concurrent Queries</title> + <prolog> + <metadata> + <data name="Category" value="Concepts"/> + <data name="Category" value="Scheduling"/> + </metadata> + </prolog> + + <conbody> + + <p> + The admission control system is decentralized, embedded in each Impala daemon and communicating through the + statestore mechanism. Although the limits you set for memory usage and number of concurrent queries apply + cluster-wide, each Impala daemon makes its own decisions about whether to allow each query to run + immediately or to queue it for a less-busy time. These decisions are fast, meaning the admission control + mechanism is low-overhead, but might be imprecise during times of heavy load across many coordinators. There could be times when the + more queries were queued (in aggregate across the cluster) than the specified limit, or when number of admitted queries + exceeds the expected number. Thus, you typically err on the + high side for the size of the queue, because there is not a big penalty for having a large number of queued + queries; and you typically err on the low side for configuring memory resources, to leave some headroom in case more + queries are admitted than expected, without running out of memory and being cancelled as a result. + </p> + +<!-- Commenting out as redundant. + <p> + The limit on the number of concurrent queries is a <q>soft</q> one, To achieve high throughput, Impala + makes quick decisions at the host level about which queued queries to dispatch. Therefore, Impala might + slightly exceed the limits from time to time. + </p> +--> + + <p> + To avoid a large backlog of queued requests, you can set an upper limit on the size of the queue for + queries that are queued. When the number of queued queries exceeds this limit, further queries are + cancelled rather than being queued. You can also configure a timeout period per pool, after which queued queries are + cancelled, to avoid indefinite waits. If a cluster reaches this state where queries are cancelled due to + too many concurrent requests or long waits for query execution to begin, that is a signal for an + administrator to take action, either by provisioning more resources, scheduling work on the cluster to + smooth out the load, or by doing <xref href="impala_performance.xml#performance">Impala performance + tuning</xref> to enable higher throughput. + </p> + </conbody> + </concept> + + <concept id="admission_jdbc_odbc"> + + <title>How Admission Control works with Impala Clients (JDBC, ODBC, HiveServer2)</title> + <prolog> + <metadata> + <data name="Category" value="JDBC"/> + <data name="Category" value="ODBC"/> + <data name="Category" value="HiveServer2"/> + <data name="Category" value="Concepts"/> + </metadata> + </prolog> + + <conbody> + + <p> + Most aspects of admission control work transparently with client interfaces such as JDBC and ODBC: + </p> + + <ul> + <li> + If a SQL statement is put into a queue rather than running immediately, the API call blocks until the + statement is dequeued and begins execution. At that point, the client program can request to fetch + results, which might also block until results become available. + </li> + + <li> + If a SQL statement is cancelled because it has been queued for too long or because it exceeded the memory + limit during execution, the error is returned to the client program with a descriptive error message. + </li> + + </ul> + + <p rev="CDH-27667"> + In Impala 2.0 and higher, you can submit + a SQL <codeph>SET</codeph> statement from the client application + to change the <codeph>REQUEST_POOL</codeph> query option. + This option lets you submit queries to different resource pools, + as described in <xref href="impala_request_pool.xml#request_pool"/>. +<!-- Commenting out as starting to be too old to mention. + Prior to Impala 2.0, that option was only settable + for a session through the <cmdname>impala-shell</cmdname> <codeph>SET</codeph> command, or cluster-wide through an + <cmdname>impalad</cmdname> startup option. +--> + </p> + + <p> + At any time, the set of queued queries could include queries submitted through multiple different Impala + daemon hosts. All the queries submitted through a particular host will be executed in order, so a + <codeph>CREATE TABLE</codeph> followed by an <codeph>INSERT</codeph> on the same table would succeed. + Queries submitted through different hosts are not guaranteed to be executed in the order they were + received. Therefore, if you are using load-balancing or other round-robin scheduling where different + statements are submitted through different hosts, set up all table structures ahead of time so that the + statements controlled by the queuing system are primarily queries, where order is not significant. Or, if a + sequence of statements needs to happen in strict order (such as an <codeph>INSERT</codeph> followed by a + <codeph>SELECT</codeph>), submit all those statements through a single session, while connected to the same + Impala daemon host. + </p> + + <p> + Admission control has the following limitations or special behavior when used with JDBC or ODBC + applications: + </p> + + <ul> + <li> + The other resource-related query options, + <codeph>RESERVATION_REQUEST_TIMEOUT</codeph> and <codeph>V_CPU_CORES</codeph>, are no longer used. Those query options only + applied to using Impala with Llama, which is no longer supported. + </li> + </ul> + </conbody> + </concept> + + <concept id="admission_schema_config"> + <title>SQL and Schema Considerations for Admission Control</title> + <conbody> + <p> + When queries complete quickly and are tuned for optimal memory usage, there is less chance of + performance or capacity problems during times of heavy load. Before setting up admission control, + tune your Impala queries to ensure that the query plans are efficient and the memory estimates + are accurate. Understanding the nature of your workload, and which queries are the most + resource-intensive, helps you to plan how to divide the queries into different pools and + decide what limits to define for each pool. + </p> + <p> + For large tables, especially those involved in join queries, keep their statistics up to date + after loading substantial amounts of new data or adding new partitions. + Use the <codeph>COMPUTE STATS</codeph> statement for unpartitioned tables, and + <codeph>COMPUTE INCREMENTAL STATS</codeph> for partitioned tables. + </p> + <p> + When you use dynamic resource pools with a <uicontrol>Max Memory</uicontrol> setting enabled, + you typically override the memory estimates that Impala makes based on the statistics from the + <codeph>COMPUTE STATS</codeph> statement. + You either set the <codeph>MEM_LIMIT</codeph> query option within a particular session to + set an upper memory limit for queries within that session, or a default <codeph>MEM_LIMIT</codeph> + setting for all queries processed by the <cmdname>impalad</cmdname> instance, or + a default <codeph>MEM_LIMIT</codeph> setting for all queries assigned to a particular + dynamic resource pool. By designating a consistent memory limit for a set of similar queries + that use the same resource pool, you avoid unnecessary query queuing or out-of-memory conditions + that can arise during high-concurrency workloads when memory estimates for some queries are inaccurate. + </p> + <p> + Follow other steps from <xref href="impala_performance.xml#performance"/> to tune your queries. + </p> + </conbody> + </concept> + + + <concept id="admission_config"> + + <title>Configuring Admission Control</title> + <prolog> + <metadata> + <data name="Category" value="Configuring"/> + </metadata> + </prolog> + + <conbody> + + <p> + The configuration options for admission control range from the simple (a single resource pool with a single + set of options) to the complex (multiple resource pools with different options, each pool handling queries + for a different set of users and groups). <ph rev="upstream">Cloudera</ph> recommends configuring the settings through the Cloudera Manager user + interface. + <!-- + , or on a system without Cloudera Manager by editing configuration files or through startup + options to the <cmdname>impalad</cmdname> daemon. + --> + </p> + +<!-- To do: reconcile the similar notes in impala_admission.xml and admin_impala_admission_control.xml + and make into a conref in both places. --> + <note type="important"> + Although the following options are still present in the Cloudera Manager interface under the + <uicontrol>Admission Control</uicontrol> configuration settings dialog, + <ph rev="upstream">Cloudera</ph> recommends you not use them in <keyword keyref="impala25_full"/> and higher. + These settings only apply if you enable admission control but leave dynamic resource pools disabled. + In <keyword keyref="impala25_full"/> and higher, prefer to set up dynamic resource pools and + customize the settings for each pool, as described in + <ph audience="integrated"><xref href="cm_mc_resource_pools.xml#concept_xkk_l1d_wr/section_p15_mhn_2v"/> and <xref href="cm_mc_resource_pools.xml#concept_xkk_l1d_wr/section_gph_tnk_lm"/></ph> + <xref audience="standalone" href="http://www.cloudera.com/documentation/enterprise/latest/topics/cm_mc_resource_pools.html" scope="external" format="html"/>. + </note> + + <section id="admission_flags"> + + <title>Impala Service Flags for Admission Control (Advanced)</title> + + <p> + The following Impala configuration options let you adjust the settings of the admission control feature. When supplying the + options on the <cmdname>impalad</cmdname> command line, prepend the option name with <codeph>--</codeph>. + </p> + + <dl id="admission_control_option_list"> + <dlentry id="queue_wait_timeout_ms"> + <dt> + <codeph>queue_wait_timeout_ms</codeph> + </dt> + <dd> + <indexterm audience="Cloudera">--queue_wait_timeout_ms</indexterm> + <b>Purpose:</b> Maximum amount of time (in milliseconds) that a + request waits to be admitted before timing out. + <p> + <b>Type:</b> <codeph>int64</codeph> + </p> + <p> + <b>Default:</b> <codeph>60000</codeph> + </p> + </dd> + </dlentry> + <dlentry id="default_pool_max_requests"> + <dt> + <codeph>default_pool_max_requests</codeph> + </dt> + <dd> + <indexterm audience="Cloudera">--default_pool_max_requests</indexterm> + <b>Purpose:</b> Maximum number of concurrent outstanding requests + allowed to run before incoming requests are queued. Because this + limit applies cluster-wide, but each Impala node makes independent + decisions to run queries immediately or queue them, it is a soft + limit; the overall number of concurrent queries might be slightly + higher during times of heavy load. A negative value indicates no + limit. Ignored if <codeph>fair_scheduler_config_path</codeph> and + <codeph>llama_site_path</codeph> are set. <p> + <b>Type:</b> + <codeph>int64</codeph> + </p> + <p> + <b>Default:</b> + <ph rev="2.5.0">-1, meaning unlimited (prior to <keyword keyref="impala25_full"/> the default was 200)</ph> + </p> + </dd> + </dlentry> + <dlentry id="default_pool_max_queued"> + <dt> + <codeph>default_pool_max_queued</codeph> + </dt> + <dd> + <indexterm audience="Cloudera">--default_pool_max_queued</indexterm> + <b>Purpose:</b> Maximum number of requests allowed to be queued + before rejecting requests. Because this limit applies + cluster-wide, but each Impala node makes independent decisions to + run queries immediately or queue them, it is a soft limit; the + overall number of queued queries might be slightly higher during + times of heavy load. A negative value or 0 indicates requests are + always rejected once the maximum concurrent requests are + executing. Ignored if <codeph>fair_scheduler_config_path</codeph> + and <codeph>llama_site_path</codeph> are set. <p> + <b>Type:</b> + <codeph>int64</codeph> + </p> + <p> + <b>Default:</b> + <ph rev="2.5.0">unlimited</ph> + </p> + </dd> + </dlentry> + <dlentry id="default_pool_mem_limit"> + <dt> + <codeph>default_pool_mem_limit</codeph> + </dt> + <dd> + <indexterm audience="Cloudera">--default_pool_mem_limit</indexterm> + <b>Purpose:</b> Maximum amount of memory (across the entire + cluster) that all outstanding requests in this pool can use before + new requests to this pool are queued. Specified in bytes, + megabytes, or gigabytes by a number followed by the suffix + <codeph>b</codeph> (optional), <codeph>m</codeph>, or + <codeph>g</codeph>, either uppercase or lowercase. You can + specify floating-point values for megabytes and gigabytes, to + represent fractional numbers such as <codeph>1.5</codeph>. You can + also specify it as a percentage of the physical memory by + specifying the suffix <codeph>%</codeph>. 0 or no setting + indicates no limit. Defaults to bytes if no unit is given. Because + this limit applies cluster-wide, but each Impala node makes + independent decisions to run queries immediately or queue them, it + is a soft limit; the overall memory used by concurrent queries + might be slightly higher during times of heavy load. Ignored if + <codeph>fair_scheduler_config_path</codeph> and + <codeph>llama_site_path</codeph> are set. <note + conref="../shared/impala_common.xml#common/admission_compute_stats" /> + <p conref="../shared/impala_common.xml#common/type_string" /> + <p> + <b>Default:</b> + <codeph>""</codeph> (empty string, meaning unlimited) </p> + </dd> + </dlentry> + <!-- Possibly from here on down, command-line controls not applicable to CM. --> + <dlentry id="disable_admission_control"> + <dt> + <codeph>disable_admission_control</codeph> + </dt> + <dd> + <indexterm audience="Cloudera">--disable_admission_control</indexterm> + <b>Purpose:</b> Turns off the admission control feature entirely, + regardless of other configuration option settings. + <p> + <b>Type:</b> Boolean </p> + <p> + <b>Default:</b> + <codeph>false</codeph> + </p> + </dd> + </dlentry> + <dlentry id="disable_pool_max_requests"> + <dt> + <codeph>disable_pool_max_requests</codeph> + </dt> + <dd> + <indexterm audience="Cloudera">--disable_pool_max_requests</indexterm> + <b>Purpose:</b> Disables all per-pool limits on the maximum number + of running requests. <p> + <b>Type:</b> Boolean </p> + <p> + <b>Default:</b> + <codeph>false</codeph> + </p> + </dd> + </dlentry> + <dlentry id="disable_pool_mem_limits"> + <dt> + <codeph>disable_pool_mem_limits</codeph> + </dt> + <dd> + <indexterm audience="Cloudera">--disable_pool_mem_limits</indexterm> + <b>Purpose:</b> Disables all per-pool mem limits. <p> + <b>Type:</b> Boolean </p> + <p> + <b>Default:</b> + <codeph>false</codeph> + </p> + </dd> + </dlentry> + <dlentry id="fair_scheduler_allocation_path"> + <dt> + <codeph>fair_scheduler_allocation_path</codeph> + </dt> + <dd> + <indexterm audience="Cloudera">--fair_scheduler_allocation_path</indexterm> + <b>Purpose:</b> Path to the fair scheduler allocation file + (<codeph>fair-scheduler.xml</codeph>). <p + conref="../shared/impala_common.xml#common/type_string" /> + <p> + <b>Default:</b> + <codeph>""</codeph> (empty string) </p> + <p> + <b>Usage notes:</b> Admission control only uses a small subset + of the settings that can go in this file, as described below. + For details about all the Fair Scheduler configuration settings, + see the <xref + href="http://hadoop.apache.org/docs/current/hadoop-yarn/hadoop-yarn-site/FairScheduler.html#Configuration" + scope="external" format="html">Apache wiki</xref>. </p> + </dd> + </dlentry> + <dlentry id="llama_site_path"> + <dt> + <codeph>llama_site_path</codeph> + </dt> + <dd> + <indexterm audience="Cloudera">--llama_site_path</indexterm> + <b>Purpose:</b> Path to the configuration file used by admission control + (<codeph>llama-site.xml</codeph>). If set, + <codeph>fair_scheduler_allocation_path</codeph> must also be set. + <p conref="../shared/impala_common.xml#common/type_string" /> + <p> + <b>Default:</b> <codeph>""</codeph> (empty string) </p> + <p> + <b>Usage notes:</b> Admission control only uses a few + of the settings that can go in this file, as described below. + </p> + </dd> + </dlentry> + </dl> + </section> + </conbody> + + <concept id="admission_config_cm"> + +<!-- TK: Maybe all this stuff overlaps with admin_impala_admission_control and can be delegated there. --> + + <title>Configuring Admission Control Using Cloudera Manager</title> + <prolog> + <metadata> + <data name="Category" value="Cloudera Manager"/> + </metadata> + </prolog> + + <conbody> + + <p> + In Cloudera Manager, you can configure pools to manage queued Impala queries, and the options for the + limit on number of concurrent queries and how to handle queries that exceed the limit. For details, see + <xref href="http://www.cloudera.com/documentation/enterprise/latest/topics/cm_mc_managing_resources.html" scope="external" format="html">Managing Resources with Cloudera Manager</xref>. + </p> + + <p audience="Cloudera"><!-- Hiding link because that subtopic is now hidden. --> + See <xref href="#admission_examples"/> for a sample setup for admission control under + Cloudera Manager. + </p> + </conbody> + </concept> + + <concept id="admission_config_noncm"> + + <title>Configuring Admission Control Using the Command Line</title> + + <conbody> + + <p> + If you do not use Cloudera Manager, you use a combination of startup options for the Impala daemon, and + optionally editing or manually constructing the configuration files + <filepath>fair-scheduler.xml</filepath> and <filepath>llama-site.xml</filepath>. + </p> + + <p> + For a straightforward configuration using a single resource pool named <codeph>default</codeph>, you can + specify configuration options on the command line and skip the <filepath>fair-scheduler.xml</filepath> + and <filepath>llama-site.xml</filepath> configuration files. + </p> + + <p> + For an advanced configuration with multiple resource pools using different settings, set up the + <filepath>fair-scheduler.xml</filepath> and <filepath>llama-site.xml</filepath> configuration files + manually. Provide the paths to each one using the <cmdname>impalad</cmdname> command-line options, + <codeph>--fair_scheduler_allocation_path</codeph> and <codeph>--llama_site_path</codeph> respectively. + </p> + + <p> + The Impala admission control feature only uses the Fair Scheduler configuration settings to determine how + to map users and groups to different resource pools. For example, you might set up different resource + pools with separate memory limits, and maximum number of concurrent and queued queries, for different + categories of users within your organization. For details about all the Fair Scheduler configuration + settings, see the + <xref href="http://hadoop.apache.org/docs/current/hadoop-yarn/hadoop-yarn-site/FairScheduler.html#Configuration" scope="external" format="html">Apache + wiki</xref>. + </p> + + <p> + The Impala admission control feature only uses a small subset of possible settings from the + <filepath>llama-site.xml</filepath> configuration file: + </p> + +<codeblock>llama.am.throttling.maximum.placed.reservations.<varname>queue_name</varname> +llama.am.throttling.maximum.queued.reservations.<varname>queue_name</varname> +<ph rev="2.5.0 IMPALA-2538">impala.admission-control.pool-default-query-options.<varname>queue_name</varname> +impala.admission-control.pool-queue-timeout-ms.<varname>queue_name</varname></ph> +</codeblock> + + <p rev="2.5.0 IMPALA-2538"> + The <codeph>impala.admission-control.pool-queue-timeout-ms</codeph> + setting specifies the timeout value for this pool, in milliseconds. + The<codeph>impala.admission-control.pool-default-query-options</codeph> + settings designates the default query options for all queries that run + in this pool. Its argument value is a comma-delimited string of + 'key=value' pairs, for example,<codeph>'key1=val1,key2=val2'</codeph>. + For example, this is where you might set a default memory limit + for all queries in the pool, using an argument such as <codeph>MEM_LIMIT=5G</codeph>. + </p> + + <p rev="2.5.0 IMPALA-2538"> + The <codeph>impala.admission-control.*</codeph> configuration settings are available in + <keyword keyref="impala25_full"/> and higher. + </p> + + <p audience="Cloudera"><!-- Hiding link because that subtopic is now hidden. --> + See <xref href="#admission_examples/section_etq_qgb_rq"/> for sample configuration files + for admission control using multiple resource pools, without Cloudera Manager. + </p> + </conbody> + </concept> + + <concept id="admission_examples"> + <!-- Pruning the CM examples and screenshots because in Impala 2.5 the defaults match up much better with our recommendations. --> + + <title>Examples of Admission Control Configurations</title> + + <conbody> + + <section id="section_fqn_qgb_rq"> + + <title>Example Admission Control Configurations Using Cloudera Manager</title> + + <p> + For full instructions about configuring dynamic resource pools through Cloudera Manager, see + <xref audience="integrated" href="cm_mc_resource_pools.xml#xd_583c10bfdbd326ba--43d5fd93-1410993f8c2--7ff2"/><xref audience="standalone" href="http://www.cloudera.com/documentation/enterprise/latest/topics/cm_mc_resource_pools.html" scope="external" format="html"/>. + </p> + + </section> + + <section id="section_etq_qgb_rq"> + + <title>Example Admission Control Configurations Using Configuration Files</title> + + <p> + For clusters not managed by Cloudera Manager, here are sample <filepath>fair-scheduler.xml</filepath> + and <filepath>llama-site.xml</filepath> files that define resource pools <codeph>root.default</codeph>, + <codeph>root.development</codeph>, and <codeph>root.production</codeph>. + These sample files are stripped down: in a real deployment they + might contain other settings for use with various aspects of the YARN component. The + settings shown here are the significant ones for the Impala admission control feature. + </p> + + <p> + <b>fair-scheduler.xml:</b> + </p> + + <p> + Although Impala does not use the <codeph>vcores</codeph> value, you must still specify it to satisfy + YARN requirements for the file contents. + </p> + + <p> + Each <codeph><aclSubmitApps></codeph> tag (other than the one for <codeph>root</codeph>) contains + a comma-separated list of users, then a space, then a comma-separated list of groups; these are the + users and groups allowed to submit Impala statements to the corresponding resource pool. + </p> + + <p> + If you leave the <codeph><aclSubmitApps></codeph> element empty for a pool, nobody can submit + directly to that pool; child pools can specify their own <codeph><aclSubmitApps></codeph> values + to authorize users and groups to submit to those pools. + </p> + +<codeblock><![CDATA[<allocations> + <queue name="root"> + <aclSubmitApps> </aclSubmitApps> + <queue name="default"> + <maxResources>50000 mb, 0 vcores</maxResources> + <aclSubmitApps>*</aclSubmitApps> + </queue> + <queue name="development"> + <maxResources>200000 mb, 0 vcores</maxResources> + <aclSubmitApps>user1,user2 dev,ops,admin</aclSubmitApps> + </queue> + <queue name="production"> + <maxResources>1000000 mb, 0 vcores</maxResources> + <aclSubmitApps> ops,admin</aclSubmitApps> + </queue> + </queue> + <queuePlacementPolicy> + <rule name="specified" create="false"/> + <rule name="default" /> + </queuePlacementPolicy> +</allocations> +]]> +</codeblock> + + <p> + <b>llama-site.xml:</b> + </p> + +<codeblock rev="2.5.0 IMPALA-2538"><![CDATA[ +<?xml version="1.0" encoding="UTF-8"?> +<configuration> + <property> + <name>llama.am.throttling.maximum.placed.reservations.root.default</name> + <value>10</value> + </property> + <property> + <name>llama.am.throttling.maximum.queued.reservations.root.default</name> + <value>50</value> + </property> + <property> + <name>impala.admission-control.pool-default-query-options.root.default</name> + <value>mem_limit=128m,query_timeout_s=20,max_io_buffers=10</value> + </property> + <property> + <name>impala.admission-control.pool-queue-timeout-ms.root.default</name> + <value>30000</value> + </property> + <property> + <name>llama.am.throttling.maximum.placed.reservations.root.development</name> + <value>50</value> + </property> + <property> + <name>llama.am.throttling.maximum.queued.reservations.root.development</name> + <value>100</value> + </property> + <property> + <name>impala.admission-control.pool-default-query-options.root.development</name> + <value>mem_limit=256m,query_timeout_s=30,max_io_buffers=10</value> + </property> + <property> + <name>impala.admission-control.pool-queue-timeout-ms.root.development</name> + <value>15000</value> + </property> + <property> + <name>llama.am.throttling.maximum.placed.reservations.root.production</name> + <value>100</value> + </property> + <property> + <name>llama.am.throttling.maximum.queued.reservations.root.production</name> + <value>200</value> + </property> +<!-- + Default query options for the 'root.production' pool. + THIS IS A NEW PARAMETER in CDH 5.7 / Impala 2.5. + Note that the MEM_LIMIT query option still shows up in here even though it is a + separate box in the UI. We do that because it is the most important query option + that people will need (everything else is somewhat advanced). + + MEM_LIMIT takes a per-node memory limit which is specified using one of the following: + - '<int>[bB]?' -> bytes (default if no unit given) + - '<float>[mM(bB)]' -> megabytes + - '<float>[gG(bB)]' -> in gigabytes + E.g. 'MEM_LIMIT=12345' (no unit) means 12345 bytes, and you can append m or g + to specify megabytes or gigabytes, though that is not required. +--> + <property> + <name>impala.admission-control.pool-default-query-options.root.production</name> + <value>mem_limit=386m,query_timeout_s=30,max_io_buffers=10</value> + </property> +<!-- + Default queue timeout (ms) for the pool 'root.production'. + If this isnât set, the process-wide flag is used. + THIS IS A NEW PARAMETER in CDH 5.7 / Impala 2.5. +--> + <property> + <name>impala.admission-control.pool-queue-timeout-ms.root.production</name> + <value>30000</value> + </property> +</configuration> +]]> +</codeblock> + </section> + </conbody> + </concept> + </concept> + +<!-- End Config --> + + <concept id="admission_guidelines"> + + <title>Guidelines for Using Admission Control</title> + <prolog> + <metadata> + <data name="Category" value="Planning"/> + <data name="Category" value="Guidelines"/> + <data name="Category" value="Best Practices"/> + </metadata> + </prolog> + + <conbody> + + <p> + To see how admission control works for particular queries, examine the profile output for the query. This + information is available through the <codeph>PROFILE</codeph> statement in <cmdname>impala-shell</cmdname> + immediately after running a query in the shell, on the <uicontrol>queries</uicontrol> page of the Impala + debug web UI, or in the Impala log file (basic information at log level 1, more detailed information at log + level 2). The profile output contains details about the admission decision, such as whether the query was + queued or not and which resource pool it was assigned to. It also includes the estimated and actual memory + usage for the query, so you can fine-tune the configuration for the memory limits of the resource pools. + </p> + + <p> + Where practical, use Cloudera Manager to configure the admission control parameters. The Cloudera Manager + GUI is much simpler than editing the configuration files directly. + </p> + + <p> + Remember that the limits imposed by admission control are <q>soft</q> limits. + The decentralized nature of this mechanism means that each Impala node makes its own decisions about whether + to allow queries to run immediately or to queue them. These decisions rely on information passed back and forth + between nodes by the statestore service. If a sudden surge in requests causes more queries than anticipated to run + concurrently, then throughput could decrease due to queries spilling to disk or contending for resources; + or queries could be cancelled if they exceed the <codeph>MEM_LIMIT</codeph> setting while running. + </p> + +<!-- + <p> + If you have trouble getting a query to run because its estimated memory usage is too high, you can override + the estimate by setting the <codeph>MEM_LIMIT</codeph> query option in <cmdname>impala-shell</cmdname>, + then issuing the query through the shell in the same session. The <codeph>MEM_LIMIT</codeph> value is + treated as the estimated amount of memory, overriding the estimate that Impala would generate based on + table and column statistics. This value is used only for making admission control decisions, and is not + pre-allocated by the query. + </p> +--> + + <p> + In <cmdname>impala-shell</cmdname>, you can also specify which resource pool to direct queries to by + setting the <codeph>REQUEST_POOL</codeph> query option. + </p> + + <p> + The statements affected by the admission control feature are primarily queries, but also include statements + that write data such as <codeph>INSERT</codeph> and <codeph>CREATE TABLE AS SELECT</codeph>. Most write + operations in Impala are not resource-intensive, but inserting into a Parquet table can require substantial + memory due to buffering intermediate data before writing out each Parquet data block. See + <xref href="impala_parquet.xml#parquet_etl"/> for instructions about inserting data efficiently into + Parquet tables. + </p> + + <p> + Although admission control does not scrutinize memory usage for other kinds of DDL statements, if a query + is queued due to a limit on concurrent queries or memory usage, subsequent statements in the same session + are also queued so that they are processed in the correct order: + </p> + +<codeblock>-- This query could be queued to avoid out-of-memory at times of heavy load. +select * from huge_table join enormous_table using (id); +-- If so, this subsequent statement in the same session is also queued +-- until the previous statement completes. +drop table huge_table; +</codeblock> + + <p> + If you set up different resource pools for different users and groups, consider reusing any classifications + you developed for use with Sentry security. See <xref href="impala_authorization.xml#authorization"/> for details. + </p> + + <p> + For details about all the Fair Scheduler configuration settings, see + <xref href="https://archive.cloudera.com/cdh5/cdh/5/hadoop/hadoop-yarn/hadoop-yarn-site/FairScheduler.html#Configuration" scope="external" format="html">Fair + Scheduler Configuration</xref>, in particular the tags such as <codeph><queue></codeph> and + <codeph><aclSubmitApps></codeph> to map users and groups to particular resource pools (queues). + </p> + +<!-- Wait a sec. We say admission control doesn't use RESERVATION_REQUEST_TIMEOUT at all. + What's the real story here? Matt did refer to some timeout option that was + available through the shell but not the DB-centric APIs. +<p> + Because you cannot override query options such as + <codeph>RESERVATION_REQUEST_TIMEOUT</codeph> + in a JDBC or ODBC application, consider configuring timeout periods + on the application side to cancel queries that take + too long due to being queued during times of high load. +</p> +--> + </conbody> + </concept> +</concept> +<!-- Admission control --> http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/3be0f122/docs/topics/impala_aggregate_functions.xml ---------------------------------------------------------------------- diff --git a/docs/topics/impala_aggregate_functions.xml b/docs/topics/impala_aggregate_functions.xml new file mode 100644 index 0000000..66d617a --- /dev/null +++ b/docs/topics/impala_aggregate_functions.xml @@ -0,0 +1,33 @@ +<?xml version="1.0" encoding="UTF-8"?> +<!DOCTYPE concept PUBLIC "-//OASIS//DTD DITA Concept//EN" "concept.dtd"> +<concept id="aggregate_functions"> + + <title>Impala Aggregate Functions</title> + <titlealts audience="PDF"><navtitle>Aggregate Functions</navtitle></titlealts> + <prolog> + <metadata> + <data name="Category" value="Impala"/> + <data name="Category" value="Impala Functions"/> + <data name="Category" value="Aggregate Functions"/> + <data name="Category" value="SQL"/> + <data name="Category" value="Data Analysts"/> + <data name="Category" value="Developers"/> + <data name="Category" value="Querying"/> + </metadata> + </prolog> + + <conbody> + + <p conref="../shared/impala_common.xml#common/aggr1"/> + +<codeblock conref="../shared/impala_common.xml#common/aggr2"/> + + <p conref="../shared/impala_common.xml#common/aggr3"/> + + <p> + <indexterm audience="Cloudera">aggregate functions</indexterm> + </p> + + <p outputclass="toc"/> + </conbody> +</concept> http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/3be0f122/docs/topics/impala_aliases.xml ---------------------------------------------------------------------- diff --git a/docs/topics/impala_aliases.xml b/docs/topics/impala_aliases.xml new file mode 100644 index 0000000..fd686cd --- /dev/null +++ b/docs/topics/impala_aliases.xml @@ -0,0 +1,87 @@ +<?xml version="1.0" encoding="UTF-8"?> +<!DOCTYPE concept PUBLIC "-//OASIS//DTD DITA Concept//EN" "concept.dtd"> +<concept id="aliases"> + + <title>Overview of Impala Aliases</title> + <titlealts audience="PDF"><navtitle>Aliases</navtitle></titlealts> + <prolog> + <metadata> + <data name="Category" value="Impala"/> + <data name="Category" value="SQL"/> + <data name="Category" value="Data Analysts"/> + <data name="Category" value="Developers"/> + <data name="Category" value="Querying"/> + <data name="Category" value="Tables"/> + <data name="Category" value="Schemas"/> + </metadata> + </prolog> + + <conbody> + + <p> + When you write the names of tables, columns, or column expressions in a query, you can assign an alias at the + same time. Then you can specify the alias rather than the original name when making other references to the + table or column in the same statement. You typically specify aliases that are shorter, easier to remember, or + both than the original names. The aliases are printed in the query header, making them useful for + self-documenting output. + </p> + + <p> + To set up an alias, add the <codeph>AS <varname>alias</varname></codeph> clause immediately after any table, + column, or expression name in the <codeph>SELECT</codeph> list or <codeph>FROM</codeph> list of a query. The + <codeph>AS</codeph> keyword is optional; you can also specify the alias immediately after the original name. + </p> + +<codeblock>-- Make the column headers of the result set easier to understand. +SELECT c1 AS name, c2 AS address, c3 AS phone FROM table_with_terse_columns; +SELECT SUM(ss_xyz_dollars_net) AS total_sales FROM table_with_cryptic_columns; +-- The alias can be a quoted string for extra readability. +SELECT c1 AS "Employee ID", c2 AS "Date of hire" FROM t1; +-- The AS keyword is optional. +SELECT c1 "Employee ID", c2 "Date of hire" FROM t1; + +-- The table aliases assigned in the FROM clause can be used both earlier +-- in the query (the SELECT list) and later (the WHERE clause). +SELECT one.name, two.address, three.phone + FROM census one, building_directory two, phonebook three +WHERE one.id = two.id and two.id = three.id; + +-- The aliases c1 and c2 let the query handle columns with the same names from 2 joined tables. +-- The aliases t1 and t2 let the query abbreviate references to long or cryptically named tables. +SELECT t1.column_n AS c1, t2.column_n AS c2 FROM long_name_table AS t1, very_long_name_table2 AS t2 + WHERE c1 = c2; +SELECT t1.column_n c1, t2.column_n c2 FROM table1 t1, table2 t2 + WHERE c1 = c2; +</codeblock> + + <p> + To use an alias name that matches one of the Impala reserved keywords (listed in + <xref href="impala_reserved_words.xml#reserved_words"/>), surround the identifier with either single or + double quotation marks, or <codeph>``</codeph> characters (backticks). + </p> + + <p> + <ph conref="../shared/impala_common.xml#common/aliases_vs_identifiers"/> + </p> + + <p conref="../shared/impala_common.xml#common/complex_types_blurb"/> + + <p rev="2.3.0"> + Queries involving the complex types (<codeph>ARRAY</codeph>, + <codeph>STRUCT</codeph>, and <codeph>MAP</codeph>), typically make + extensive use of table aliases. These queries involve join clauses + where the complex type column is treated as a joined table. + To construct two-part or three-part qualified names for the + complex column elements in the <codeph>FROM</codeph> list, + sometimes it is syntactically required to construct a table + alias for the complex column where it is referenced in the join clause. + See <xref href="impala_complex_types.xml#complex_types"/> for details and examples. + </p> + + <p> + <b>Alternatives:</b> + </p> + + <p conref="../shared/impala_common.xml#common/views_vs_identifiers"/> + </conbody> +</concept> http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/3be0f122/docs/topics/impala_allow_unsupported_formats.xml ---------------------------------------------------------------------- diff --git a/docs/topics/impala_allow_unsupported_formats.xml b/docs/topics/impala_allow_unsupported_formats.xml new file mode 100644 index 0000000..fbf758e --- /dev/null +++ b/docs/topics/impala_allow_unsupported_formats.xml @@ -0,0 +1,31 @@ +<?xml version="1.0" encoding="UTF-8"?> +<!DOCTYPE concept PUBLIC "-//OASIS//DTD DITA Concept//EN" "concept.dtd"> +<concept id="allow_unsupported_formats"> + + <title>ALLOW_UNSUPPORTED_FORMATS Query Option</title> + <titlealts audience="PDF"><navtitle>ALLOW_UNSUPPORTED_FORMATS</navtitle></titlealts> + <prolog> + <metadata> + <data name="Category" value="Impala"/> + <data name="Category" value="Impala Query Options"/> + <data name="Category" value="Deprecated Features"/> + </metadata> + </prolog> + + <conbody> + +<!-- +The original brief explanation with not enough detail comes from the comments at: + http://github.sf.cloudera.com/CDH/Impala/raw/master/common/thrift/ImpalaService.thrift +Removing that wording from here after discussions with dev team. Just recording the URL for posterity. +--> + + <p> + An obsolete query option from early work on support for file formats. Do not use. Might be removed in the + future. + </p> + + <p conref="../shared/impala_common.xml#common/type_boolean"/> + <p conref="../shared/impala_common.xml#common/default_false_0"/> + </conbody> +</concept> http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/3be0f122/docs/topics/impala_alter_function.xml ---------------------------------------------------------------------- diff --git a/docs/topics/impala_alter_function.xml b/docs/topics/impala_alter_function.xml new file mode 100644 index 0000000..2ea0a26 --- /dev/null +++ b/docs/topics/impala_alter_function.xml @@ -0,0 +1,21 @@ +<?xml version="1.0" encoding="UTF-8"?> +<!DOCTYPE concept PUBLIC "-//OASIS//DTD DITA Concept//EN" "concept.dtd"> +<concept audience="Cloudera" rev="1.x" id="alter_function"> + + <title>ALTER FUNCTION Statement</title> + <titlealts audience="PDF"><navtitle>ALTER FUNCTION</navtitle></titlealts> + <prolog> + <metadata> + <data name="Category" value="Impala"/> + <data name="Category" value="SQL"/> + <data name="Category" value="DDL"/> + <data name="Category" value="Developers"/> + <data name="Category" value="Data Analysts"/> + </metadata> + </prolog> + + <conbody> + + <p/> + </conbody> +</concept>
