http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/1fcc8cee/docs/topics/impala_shell_commands.xml ---------------------------------------------------------------------- diff --git a/docs/topics/impala_shell_commands.xml b/docs/topics/impala_shell_commands.xml new file mode 100644 index 0000000..6d6f720 --- /dev/null +++ b/docs/topics/impala_shell_commands.xml @@ -0,0 +1,399 @@ +<?xml version="1.0" encoding="UTF-8"?> +<!DOCTYPE concept PUBLIC "-//OASIS//DTD DITA Concept//EN" "concept.dtd"> +<concept id="shell_commands"> + + <title>impala-shell Command Reference</title> + <titlealts audience="PDF"><navtitle>Command Reference</navtitle></titlealts> + <prolog> + <metadata> + <data name="Category" value="Impala"/> + <data name="Category" value="impala-shell"/> + <data name="Category" value="SQL"/> + <data name="Category" value="Data Analysts"/> + <data name="Category" value="Developers"/> + </metadata> + </prolog> + + <conbody> + + <p> + <indexterm audience="Cloudera">impala-shell</indexterm> + Use the following commands within <codeph>impala-shell</codeph> to pass requests to the + <codeph>impalad</codeph> daemon that the shell is connected to. You can enter a command interactively at the + prompt, or pass it as the argument to the <codeph>-q</codeph> option of <codeph>impala-shell</codeph>. Most + of these commands are passed to the Impala daemon as SQL statements; refer to the corresponding + <xref href="impala_langref_sql.xml#langref_sql">SQL language reference sections</xref> for full syntax + details. + </p> + + <table> + <tgroup cols="2"> + <colspec colname="1" colwidth="10*"/> + <colspec colname="2" colwidth="40*"/> + <thead> + <row> + <entry> + Command + </entry> + <entry> + Explanation + </entry> + </row> + </thead> + <tbody> + <row id="alter_cmd"> + <entry> + <p> + <codeph>alter</codeph> + </p> + </entry> + <entry> + <p> + Changes the underlying structure or settings of an Impala table, or a table shared between Impala + and Hive. See <xref href="impala_alter_table.xml#alter_table"/> and + <xref href="impala_alter_view.xml#alter_view"/> for details. + </p> + </entry> + </row> + <row rev="1.2.2" id="compute_cmd"> + <entry> + <p> + <codeph>compute stats</codeph> + </p> + </entry> + <entry> + <p> + Gathers important performance-related information for a table, used by Impala to optimize queries. + See <xref href="impala_compute_stats.xml#compute_stats"/> for details. + </p> + </entry> + </row> + <row id="connect_cmd"> + <entry> + <p> + <codeph>connect</codeph> + </p> + </entry> + <entry> + <p> + Connects to the specified instance of <codeph>impalad</codeph>. The default port of 21000 is + assumed unless you provide another value. You can connect to any host in your cluster that is + running <codeph>impalad</codeph>. If you connect to an instance of <codeph>impalad</codeph> that + was started with an alternate port specified by the <codeph>--fe_port</codeph> flag, you must + provide that alternate port. See <xref href="impala_connecting.xml#connecting"/> for examples. + </p> + + <p conref="../shared/impala_common.xml#common/set_vs_connect"/> + </entry> + </row> + <row id="describe_cmd"> + <entry> + <p> + <codeph>describe</codeph> + </p> + </entry> + <entry> + <p> + Shows the columns, column data types, and any column comments for a specified table. + <codeph>DESCRIBE FORMATTED</codeph> shows additional information such as the HDFS data directory, + partitions, and internal properties for the table. See <xref href="impala_describe.xml#describe"/> + for details about the basic <codeph>DESCRIBE</codeph> output and the <codeph>DESCRIBE + FORMATTED</codeph> variant. You can use <codeph>DESC</codeph> as shorthand for the + <codeph>DESCRIBE</codeph> command. + </p> + </entry> + </row> + <row id="drop_cmd"> + <entry> + <p> + <codeph>drop</codeph> + </p> + </entry> + <entry> + <p> + Removes a schema object, and in some cases its associated data files. See + <xref href="impala_drop_table.xml#drop_table"/>, <xref href="impala_drop_view.xml#drop_view"/>, + <xref href="impala_drop_database.xml#drop_database"/>, and + <xref href="impala_drop_function.xml#drop_function"/> for details. + </p> + </entry> + </row> + <row id="explain_cmd"> + <entry> + <p> + <codeph>explain</codeph> + </p> + </entry> + <entry> + <p> + Provides the execution plan for a query. <codeph>EXPLAIN</codeph> represents a query as a series of + steps. For example, these steps might be map/reduce stages, metastore operations, or file system + operations such as move or rename. See <xref href="impala_explain.xml#explain"/> and + <xref href="impala_explain_plan.xml#perf_explain"/> for details. + </p> + </entry> + </row> + <row id="help_cmd"> + <entry> + <p> + <codeph>help</codeph> + </p> + </entry> + <entry> + <p> + Help provides a list of all available commands and options. + </p> + </entry> + </row> + <row id="history_cmd"> + <entry> + <p> + <codeph>history</codeph> + </p> + </entry> + <entry> + <p> + Maintains an enumerated cross-session command history. This history is stored in the + <filepath>~/.impalahistory</filepath> file. + </p> + </entry> + </row> + <row id="insert_cmd"> + <entry> + <p> + <codeph>insert</codeph> + </p> + </entry> + <entry> + <p> + Writes the results of a query to a specified table. This either overwrites table data or appends + data to the existing table content. See <xref href="impala_insert.xml#insert"/> for details. + </p> + </entry> + </row> + <row id="invalidate_metadata_cmd"> + <entry> + <p> + <codeph>invalidate metadata</codeph> + </p> + </entry> + <entry> + <p> + Updates <cmdname>impalad</cmdname> metadata for table existence and structure. Use this command + after creating, dropping, or altering databases, tables, or partitions in Hive. See + <xref href="impala_invalidate_metadata.xml#invalidate_metadata"/> for details. + </p> + </entry> + </row> + <row id="profile_cmd"> + <entry> + <p> + <codeph>profile</codeph> + </p> + </entry> + <entry> + <p> + Displays low-level information about the most recent query. Used for performance diagnosis and + tuning. <ph rev="1.4.0"> The report starts with the same information as produced by the + <codeph>EXPLAIN</codeph> statement and the <codeph>SUMMARY</codeph> command.</ph> See + <xref href="impala_explain_plan.xml#perf_profile"/> for details. + </p> + </entry> + </row> + <row id="quit_cmd"> + <entry> + <p> + <codeph>quit</codeph> + </p> + </entry> + <entry> + <p> + Exits the shell. Remember to include the final semicolon so that the shell recognizes the end of + the command. + </p> + </entry> + </row> + <row id="refresh_cmd"> + <entry> + <p> + <codeph>refresh</codeph> + </p> + </entry> + <entry> + <p> + Refreshes <cmdname>impalad</cmdname> metadata for the locations of HDFS blocks corresponding to + Impala data files. Use this command after loading new data files into an Impala table through Hive + or through HDFS commands. See <xref href="impala_refresh.xml#refresh"/> for details. + </p> + </entry> + </row> + <row id="select_cmd"> + <entry> + <p> + <codeph>select</codeph> + </p> + </entry> + <entry> + <p> + Specifies the data set on which to complete some action. All information returned from + <codeph>select</codeph> can be sent to some output such as the console or a file or can be used to + complete some other element of query. See <xref href="impala_select.xml#select"/> for details. + </p> + </entry> + </row> + <row id="set_cmd"> + <entry> + <p> + <codeph>set</codeph> + </p> + </entry> + <entry> + <p> + Manages query options for an <cmdname>impala-shell</cmdname> session. The available options are the + ones listed in <xref href="impala_query_options.xml#query_options"/>. These options are used for + query tuning and troubleshooting. Issue <codeph>SET</codeph> with no arguments to see the current + query options, either based on the <cmdname>impalad</cmdname> defaults, as specified by you at + <cmdname>impalad</cmdname> startup, or based on earlier <codeph>SET</codeph> statements in the same + session. To modify option values, issue commands with the syntax <codeph>set + <varname>option</varname>=<varname>value</varname></codeph>. To restore an option to its default, + use the <codeph>unset</codeph> command. Some options take Boolean values of <codeph>true</codeph> + and <codeph>false</codeph>. Others take numeric arguments, or quoted string values. + </p> + + <p conref="../shared/impala_common.xml#common/set_vs_connect"/> + + <p rev="2.0.0"> + In Impala 2.0 and later, <codeph>SET</codeph> is available as a SQL statement for any kind of + application, not only through <cmdname>impala-shell</cmdname>. See + <xref href="impala_set.xml#set"/> for details. + </p> + + <p rev="2.5.0 IMPALA-2180"> + In Impala 2.5 and later, you can use <codeph>SET</codeph> to define your own substitution variables + within an <cmdname>impala-shell</cmdname> session. + Within a SQL statement, you substitute the value by using the notation <codeph>${var:<varname>variable_name</varname>}</codeph>. + </p> + </entry> + </row> + <row id="shell_cmd"> + <entry> + <p> + <codeph>shell</codeph> + </p> + </entry> + <entry> + <p> + Executes the specified command in the operating system shell without exiting + <codeph>impala-shell</codeph>. You can use the <codeph>!</codeph> character as shorthand for the + <codeph>shell</codeph> command. + </p> + + <note> + Quote any instances of the <codeph>--</codeph> or <codeph>/*</codeph> tokens to avoid them being + interpreted as the start of a comment. To embed comments within <codeph>source</codeph> or + <codeph>!</codeph> commands, use the shell comment character <codeph>#</codeph> before the comment + portion of the line. + </note> + </entry> + </row> + <row id="show_cmd"> + <entry> + <p> + <codeph>show</codeph> + </p> + </entry> + <entry> + <p> + Displays metastore data for schema objects created and accessed through Impala, Hive, or both. + <codeph>show</codeph> can be used to gather information about objects such as databases, tables, and functions. + See <xref href="impala_show.xml#show"/> for details. + </p> + </entry> + </row> + <row id="source_cmd" rev="IMPALA-3397 CDH-40097"> + <entry> + <p> + <codeph>source</codeph> or <codeph>src</codeph> + </p> + </entry> + <entry> + <p> + Executes one or more statements residing in a specified file from the local filesystem. + Allows you to perform the same kinds of batch operations as with the <codeph>-f</codeph> option, + but interactively within the interpreter. The file can contain SQL statements and other + <cmdname>impala-shell</cmdname> commands, including additional <codeph>SOURCE</codeph> commands + to perform a flexible sequence of actions. Each command or statement, except the last one in the file, + must end with a semicolon. + See <xref href="impala_shell_running_commands.xml#shell_running_commands"/> for examples. + </p> + </entry> + </row> + <row rev="1.4.0" id="summary_cmd"> + <entry> + <p> + <codeph>summary</codeph> + </p> + </entry> + <entry> + <p> + Summarizes the work performed in various stages of a query. It provides a higher-level view of the + information displayed by the <codeph>EXPLAIN</codeph> command. Added in Impala 1.4.0. See + <xref href="impala_explain_plan.xml#perf_summary"/> for details about the report format + and how to interpret it. + </p> + <p rev="2.3.0"> + In CDH 5.5 / Impala 2.3 and higher, you can see a continuously updated report of + the summary information while a query is in progress. + See <xref href="impala_live_summary.xml#live_summary"/> for details. + </p> + </entry> + </row> + <row id="unset_cmd"> + <entry> + <p> + <codeph>unset</codeph> + </p> + </entry> + <entry> + <p> + Removes any user-specified value for a query option and returns the option to its default value. + See <xref href="impala_query_options.xml#query_options"/> for the available query options. + </p> + <p rev="2.5.0 IMPALA-2180"> + In CDH 5.7 / Impala 2.5 and higher, it can also remove user-specified substitution variables + using the notation <codeph>UNSET VAR:<varname>variable_name</varname></codeph>. + </p> + </entry> + </row> + <row id="use_cmd"> + <entry> + <p> + <codeph>use</codeph> + </p> + </entry> + <entry> + <p> + Indicates the database against which to execute subsequent commands. Lets you avoid using fully + qualified names when referring to tables in databases other than <codeph>default</codeph>. See + <xref href="impala_use.xml#use"/> for details. Not effective with the <codeph>-q</codeph> option, + because that option only allows a single statement in the argument. + </p> + </entry> + </row> + <row id="version_cmd"> + <entry> + <p> + <codeph>version</codeph> + </p> + </entry> + <entry> + <p> + Returns Impala version information. + </p> + </entry> + </row> + </tbody> + </tgroup> + </table> + </conbody> +</concept>
http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/1fcc8cee/docs/topics/impala_shell_running_commands.xml ---------------------------------------------------------------------- diff --git a/docs/topics/impala_shell_running_commands.xml b/docs/topics/impala_shell_running_commands.xml new file mode 100644 index 0000000..013b23d --- /dev/null +++ b/docs/topics/impala_shell_running_commands.xml @@ -0,0 +1,265 @@ +<?xml version="1.0" encoding="UTF-8"?> +<!DOCTYPE concept PUBLIC "-//OASIS//DTD DITA Concept//EN" "concept.dtd"> +<concept id="shell_running_commands"> + + <title>Running Commands and SQL Statements in impala-shell</title> + <titlealts audience="PDF"><navtitle>Running Commands and SQL Statements</navtitle></titlealts> + <prolog> + <metadata> + <data name="Category" value="Impala"/> + <data name="Category" value="impala-shell"/> + <data name="Category" value="SQL"/> + <data name="Category" value="Data Analysts"/> + <data name="Category" value="Developers"/> + </metadata> + </prolog> + + <conbody> + + <p> + For information on available commands, see + <xref href="impala_shell_commands.xml#shell_commands"/>. You can see the full set of available + commands by pressing TAB twice, for example: + </p> + +<codeblock>[impalad-host:21000] > +connect describe explain help history insert quit refresh select set shell show use version +[impalad-host:21000] ></codeblock> + + <note> + Commands must be terminated by a semi-colon. A command can span multiple lines. + </note> + + <p> + For example: + </p> + +<codeblock>[localhost:21000] > select * + > from t1 + > limit 5; ++---------+-----------+ +| s1 | s2 | ++---------+-----------+ +| hello | world | +| goodbye | cleveland | ++---------+-----------+ +</codeblock> + + <p> + A comment is considered part of the statement it precedes, so when you enter a <codeph>--</codeph> or + <codeph>/* */</codeph> comment, you get a continuation prompt until you finish entering a statement ending + with a semicolon: + </p> + +<codeblock>[localhost:21000] > -- This is a test comment + > show tables like 't*'; ++--------+ +| name | ++--------+ +| t1 | +| t2 | +| tab1 | +| tab2 | +| tab3 | +| text_t | ++--------+ +</codeblock> + + <p> + Use the up-arrow and down-arrow keys to cycle through and edit previous commands. + <cmdname>impala-shell</cmdname> uses the <codeph>readline</codeph> library and so supports a standard set of + keyboard shortcuts for editing and cursor movement, such as <codeph>Ctrl-A</codeph> for beginning of line and + <codeph>Ctrl-E</codeph> for end of line. + </p> + + <p rev="2.5.0 IMPALA-2179 IMPALA-2180"> + In CDH 5.7 / Impala 2.5 and higher, you can define substitution variables to be used within SQL statements + processed by <cmdname>impala-shell</cmdname>. On the command line, you specify the option + <codeph>--var=<varname>variable_name</varname>=<varname>value</varname></codeph>. + Within an interactive session or a script file processed by the <codeph>-f</codeph> option, you specify + a <codeph>SET</codeph> command using the notation <codeph>SET VAR:<varname>variable_name</varname>=<varname>value</varname></codeph>. + Within a SQL statement, you substitute the value by using the notation <codeph>${var:<varname>variable_name</varname>}</codeph>. + </p> + + <note> + Because this feature is part of <cmdname>impala-shell</cmdname> rather than the <cmdname>impalad</cmdname> + backend, make sure the client system you are connecting from has the most recent <cmdname>impala-shell</cmdname>. + You can use this feature with a new <cmdname>impala-shell</cmdname> connecting to an older <cmdname>impalad</cmdname>, + but not the reverse. + </note> + + <p rev="2.5.0 IMPALA-2179 IMPALA-2180"> + For example, here are some <cmdname>impala-shell</cmdname> commands that define substitution variables and then + use them in SQL statements executed through the <codeph>-q</codeph> and <codeph>-f</codeph> options. + Notice how the <codeph>-q</codeph> argument strings are single-quoted to prevent shell expansion of the + <codeph>${var:value}</codeph> notation, and any string literals within the queries are enclosed by double quotation marks. + </p> + +<codeblock rev="2.5.0 IMPALA-2179 IMPALA-2180"> +$ impala-shell --var=tname=table1 --var=colname=x --var=coltype=string -q 'create table ${var:tname} (${var:colname} ${var:coltype}) stored as parquet' +Starting Impala Shell without Kerberos authentication +Connected to <varname>hostname</varname> +Server version: <varname>impalad_version</varname> +Query: create table table1 (x string) stored as parquet + +$ NEW_STRING="hello world" +$ impala-shell --var=tname=table1 --var=insert_val="$NEW_STRING" -q 'insert into ${var:tname} values ("${var:insert_val}")' +Starting Impala Shell without Kerberos authentication +Connected to <varname>hostname</varname> +Server version: <varname>impalad_version</varname> +Query: insert into table1 values ("hello world") +Inserted 1 row(s) in 1.40s + +$ for VAL in foo bar bletch +do + impala-shell --var=tname=table1 --var=insert_val="$VAL" -q 'insert into ${var:tname} values ("${var:insert_val}")' +done +... +Query: insert into table1 values ("foo") +Inserted 1 row(s) in 0.22s +Query: insert into table1 values ("bar") +Inserted 1 row(s) in 0.11s +Query: insert into table1 values ("bletch") +Inserted 1 row(s) in 0.21s + +$ echo "Search for what substring?" ; read answer +Search for what substring? +b +$ impala-shell --var=tname=table1 -q 'select x from ${var:tname} where x like "%${var:answer}%"' +Starting Impala Shell without Kerberos authentication +Connected to <varname>hostname</varname> +Server version: <varname>impalad_version</varname> +Query: select x from table1 where x like "%b%" ++--------+ +| x | ++--------+ +| bletch | +| bar | ++--------+ +Fetched 2 row(s) in 0.83s +</codeblock> + + <p rev="2.5.0 IMPALA-2179 IMPALA-2180"> + Here is a substitution variable passed in by the <codeph>--var</codeph> option, + and then referenced by statements issued interactively. Then the variable is + cleared with the <codeph>UNSET</codeph> command, and defined again with the + <codeph>SET</codeph> command. + </p> + +<codeblock rev="2.5.0 IMPALA-2179 IMPALA-2180"> +$ impala-shell --quiet --var=tname=table1 +Starting Impala Shell without Kerberos authentication +*********************************************************************************** +<varname>banner_message</varname> +*********************************************************************************** +[<varname>hostname</varname>:21000] > select count(*) from ${var:tname}; ++----------+ +| count(*) | ++----------+ +| 4 | ++----------+ +[<varname>hostname</varname>:21000] > unset var:tname; +Unsetting variable TNAME +[<varname>hostname</varname>:21000] > select count(*) from ${var:tname}; +Error: Unknown variable TNAME +[<varname>hostname</varname>:21000] > set var:tname=table1; +[<varname>hostname</varname>:21000] > select count(*) from ${var:tname}; ++----------+ +| count(*) | ++----------+ +| 4 | ++----------+ +</codeblock> + + <p rev="IMPALA-3397 CDH-40097"> + The following example shows how the <codeph>SOURCE</codeph> command can execute + a series of statements from a file: + </p> + +<codeblock rev="IMPALA-3397 CDH-40097"> +$ cat commands.sql +show databases; +show tables in default; +show functions in _impala_builtins like '*minute*'; + +$ impala-shell -i localhost +... +[localhost:21000] > source commands.sql; +Query: show databases ++------------------+----------------------------------------------+ +| name | comment | ++------------------+----------------------------------------------+ +| _impala_builtins | System database for Impala builtin functions | +| default | Default Hive database | ++------------------+----------------------------------------------+ +Fetched 2 row(s) in 0.06s +Query: show tables in default ++-----------+ +| name | ++-----------+ +| customers | +| sample_07 | +| sample_08 | +| web_logs | ++-----------+ +Fetched 4 row(s) in 0.02s +Query: show functions in _impala_builtins like '*minute*' ++-------------+--------------------------------+-------------+---------------+ +| return type | signature | binary type | is persistent | ++-------------+--------------------------------+-------------+---------------+ +| INT | minute(TIMESTAMP) | BUILTIN | true | +| TIMESTAMP | minutes_add(TIMESTAMP, BIGINT) | BUILTIN | true | +| TIMESTAMP | minutes_add(TIMESTAMP, INT) | BUILTIN | true | +| TIMESTAMP | minutes_sub(TIMESTAMP, BIGINT) | BUILTIN | true | +| TIMESTAMP | minutes_sub(TIMESTAMP, INT) | BUILTIN | true | ++-------------+--------------------------------+-------------+---------------+ +Fetched 5 row(s) in 0.03s +</codeblock> + + <p rev="IMPALA-3397 CDH-40097"> + The following example shows how a file that is run by the <codeph>SOURCE</codeph> command, + or through the <codeph>-q</codeph> or <codeph>-f</codeph> options of <cmdname>impala-shell</cmdname>, + can contain additional <codeph>SOURCE</codeph> commands. + The first file, <filepath>nested1.sql</filepath>, runs an <cmdname>impala-shell</cmdname> command + and then also runs the commands from <filepath>nested2.sql</filepath>. + This ability for scripts to call each other is often useful for code that sets up schemas for applications + or test environments. + </p> + +<codeblock rev="IMPALA-3397 CDH-40097"> +$ cat nested1.sql +show functions in _impala_builtins like '*minute*'; +source nested2.sql +$ cat nested2.sql +show functions in _impala_builtins like '*hour*' + +$ impala-shell -i localhost -f nested1.sql +Starting Impala Shell without Kerberos authentication +Connected to localhost:21000 +... +Query: show functions in _impala_builtins like '*minute*' ++-------------+--------------------------------+-------------+---------------+ +| return type | signature | binary type | is persistent | ++-------------+--------------------------------+-------------+---------------+ +| INT | minute(TIMESTAMP) | BUILTIN | true | +| TIMESTAMP | minutes_add(TIMESTAMP, BIGINT) | BUILTIN | true | +| TIMESTAMP | minutes_add(TIMESTAMP, INT) | BUILTIN | true | +| TIMESTAMP | minutes_sub(TIMESTAMP, BIGINT) | BUILTIN | true | +| TIMESTAMP | minutes_sub(TIMESTAMP, INT) | BUILTIN | true | ++-------------+--------------------------------+-------------+---------------+ +Fetched 5 row(s) in 0.01s +Query: show functions in _impala_builtins like '*hour*' ++-------------+------------------------------+-------------+---------------+ +| return type | signature | binary type | is persistent | ++-------------+------------------------------+-------------+---------------+ +| INT | hour(TIMESTAMP) | BUILTIN | true | +| TIMESTAMP | hours_add(TIMESTAMP, BIGINT) | BUILTIN | true | +| TIMESTAMP | hours_add(TIMESTAMP, INT) | BUILTIN | true | +| TIMESTAMP | hours_sub(TIMESTAMP, BIGINT) | BUILTIN | true | +| TIMESTAMP | hours_sub(TIMESTAMP, INT) | BUILTIN | true | ++-------------+------------------------------+-------------+---------------+ +Fetched 5 row(s) in 0.01s +</codeblock> + + </conbody> +</concept> http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/1fcc8cee/docs/topics/impala_ssl.xml ---------------------------------------------------------------------- diff --git a/docs/topics/impala_ssl.xml b/docs/topics/impala_ssl.xml new file mode 100644 index 0000000..56e86a0 --- /dev/null +++ b/docs/topics/impala_ssl.xml @@ -0,0 +1,256 @@ +<?xml version="1.0" encoding="UTF-8"?> +<!DOCTYPE concept PUBLIC "-//OASIS//DTD DITA Concept//EN" "concept.dtd"> +<concept id="ssl"> + + <title id="tls">Configuring TLS/SSL for Impala</title> + + <prolog> + <metadata> + <data name="Category" value="Impala"/> + <data name="Category" value="Security"/> + <data name="Category" value="SSL"/> + <data name="Category" value="Encryption"/> + <data name="Category" value="Configuring"/> + <data name="Category" value="Administrators"/> + </metadata> + </prolog> + + <conbody> + + <p> + <indexterm audience="Cloudera">SSL</indexterm> + Impala supports TLS/SSL network encryption, between Impala and client programs, and between the Impala-related daemons running on + different nodes in the cluster. This feature is important when you also use other features such as Kerberos authentication or Sentry + authorization, where credentials are being transmitted back and forth. + <note conref="../shared/CDHVariables.xml#xd_583c10bfdbd326ba-3ca24a24-13d80143249--7f9a/CMCDH_EitherOK" + /> + </p> + + </conbody> + + <concept id="concept_gnk_2tt_qp"> + + <title>Using Cloudera Manager</title> + + <prolog> + <metadata> + <data name="Category" value="Cloudera Manager"/> + </metadata> + </prolog> + + <conbody> + + <p> + To configure Impala to listen for Beeswax and HiveServer2 requests on TLS/SSL-secured ports: + <ol id="ol_rnf_ftt_qp"> + <li> + Open the Cloudera Manager Admin Console and go to the <uicontrol>Impala</uicontrol> service. + </li> + + <li + conref="../shared/cm_common_elements.xml#cm/config_edit"/> + + <li> + Select <menucascade><uicontrol>Scope</uicontrol><uicontrol>Impala (Service-Wide)</uicontrol></menucascade>. + </li> + + <li> + Select <menucascade><uicontrol>Category</uicontrol><uicontrol>Security</uicontrol></menucascade>. + </li> + + <li> + Edit the following properties: + <table frame="all" + id="table_drf_ftt_qp"> + <title>Impala SSL Properties</title> + <tgroup cols="2"> + <colspec colname="c1" colnum="1" colwidth="1*"/> + <colspec colname="c2" colnum="2" colwidth="2.5*"/> + <thead> + <row> + <entry> + Property + </entry> + <entry> + Description + </entry> + </row> + </thead> + <tbody> + <row> + <entry> + <b>Enable TLS/SSL for Impala Client Services</b> + </entry> + <entry> + Encrypt communication between clients (like ODBC, JDBC, and the Impala shell) and the Impala daemon using Transport + Layer Security (TLS) (formerly known as Secure Socket Layer (SSL)). + </entry> + </row> + <row> + <entry> + <b>SSL/TLS Certificate for Clients</b> + </entry> + <entry> + Local path to the X509 certificate that identifies the Impala daemon to clients during TLS/SSL connections. This + file must be in PEM format. + </entry> + </row> + <row> + <entry> + <b>SSL/TLS Private Key for Clients</b> + </entry> + <entry> + Local path to the private key that matches the certificate specified in the Certificate for Clients. This file must be + in PEM format. + </entry> + </row> + <row> + <entry> + <b>SSL/TLS Private Key Password for Clients</b> + </entry> + <entry> + A shell command for Impala to run on startup to retrieve the password for a password-protected private key file. + The output of the command is truncated to a maximum of 1024 bytes, and any trailing whitespace (such as spaces + or newline characters) is trimmed. If the command exits with an error, Impala does not start. If the password + is incorrect, clients cannot connect to the server regardless of whether the public key is correct. + </entry> + </row> + <row> + <entry> + <b>SSL/TLS CA Certificate</b> + </entry> + <entry> + Must be specified for TLS/SSL encryption to be enabled for communication + between internal Impala components. + </entry> + </row> + <row> + <entry> + <b>SSL/TLS Certificate for <varname>Impala component</varname> Webserver</b> + </entry> + <entry> + There are three of these configuration settings, one each for <q>Impala Daemon</q>, + <q>Catalog Server</q>, and <q>Statestore</q>. + Each of these Impala components has its own internal web server that powers the + associated web UI with diagnostic information. + The configuration setting represents the local path to the X509 certificate that + identifies the web server to clients during TLS/SSL connections. This + file must be in PEM format. + </entry> + </row> + </tbody> + </tgroup> + </table> + </li> + + <li conref="../shared/cm_common_elements.xml#cm/save_changes_short"/> + + <li> + Restart the Impala service. + </li> + </ol> + </p> + + <p> + For information on configuring TLS/SSL communication with the <codeph>impala-shell</codeph> interpreter, see + <xref href="#concept_q1p_j2d_rp/secref"/>. + </p> + + </conbody> + + </concept> + + <concept id="concept_q1p_j2d_rp"> + + <title>Using the Command Line</title> + + <conbody> + +<!-- +Info from Henry, from https://docs.google.com/a/cloudera.com/document/d/1u00CJ8WRzXR-1AK_WnQlR6LMtY-7Rc3eHaKNgw3IZvA/edit +--> + + <p> + To enable SSL for when client applications connect to Impala, add both of the following flags to the <cmdname>impalad</cmdname> startup options: + </p> + + <ul id="ul_i2p_m2d_rp"> + <li> + <codeph>--ssl_server_certificate</codeph>: the full path to the server certificate, on the local filesystem. + </li> + + <li> + <codeph>--ssl_private_key</codeph>: the full path to the server private key, on the local filesystem. + </li> + </ul> + + <p rev="2.3.0"> + In CDH 5.5 / Impala 2.3 and higher, Impala can also use SSL for its own internal communication between the + <cmdname>impalad</cmdname>, <codeph>statestored</codeph>, and <codeph>catalogd</codeph> daemons. + To enable this additional SSL encryption, set the <codeph>--ssl_server_certificate</codeph> + and <codeph>--ssl_private_key</codeph> flags in the startup options for + <cmdname>impalad</cmdname>, <cmdname>catalogd</cmdname>, and <cmdname>statestored</cmdname>, + and also add the <codeph>--ssl_client_ca_certificate</codeph> flag for all three of those daemons. + </p> + + <note conref="../shared/impala_common.xml#common/impala_kerberos_ssl_caveat"/> + + <p> + If either of these flags are set, both must be set. In that case, Impala starts listening for Beeswax and HiveServer2 requests on + SSL-secured ports only. (The port numbers stay the same; see <xref href="impala_ports.xml#ports"/> for details.) + </p> + + <p> + Since Impala uses passphrase-less certificates in PEM format, you can reuse a host's existing Java keystore by converting it to the + PEM format. For instructions, see + <xref audience="integrated" href="cm_sg_openssl_jks.xml#concept_ek3_sdl_rp"/><xref audience="standalone" href="http://www.cloudera.com/documentation/enterprise/latest/topics/cm_sg_openssl_jks.html" scope="external" format="html"/>. + </p> + + <section id="secref"> + + <title>Configuring TLS/SSL Communication for the Impala Shell</title> + + <p> + Typically, a client program has corresponding configuration properties in Cloudera Manager to verify that it is connecting to the + right server. For example, with SSL enabled for Impala, you use the following options when starting the + <cmdname>impala-shell</cmdname> interpreter: + </p> + + <ul id="ul_kgp_m2d_rp"> + <li> + <codeph>--ssl</codeph>: enables TLS/SSL for <cmdname>impala-shell</cmdname>. + </li> + + <li> + <codeph>--ca_cert</codeph>: the local pathname pointing to the third-party CA certificate, or to a copy of the server + certificate for self-signed server certificates. + </li> + </ul> + + <p> + If <codeph>--ca_cert</codeph> is not set, <cmdname>impala-shell</cmdname> enables TLS/SSL, but does not validate the server + certificate. This is useful for connecting to a known-good Impala that is only running over TLS/SSL, when a copy of the + certificate is not available (such as when debugging customer installations). + </p> + + </section> + + </conbody> + + </concept> + + <concept id="ssl_jdbc_odbc"> + <title>Using TLS/SSL with Business Intelligence Tools</title> + <conbody> + <p> + You can use Kerberos authentication, TLS/SSL encryption, or both to secure + connections from JDBC and ODBC applications to Impala. + See <xref href="impala_jdbc.xml#impala_jdbc"/> and <xref href="impala_odbc.xml#impala_odbc"/> + for details. + </p> + + <p conref="../shared/impala_common.xml#common/hive_jdbc_ssl_kerberos_caveat"/> + </conbody> + </concept> + +</concept> http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/1fcc8cee/docs/topics/impala_troubleshooting.xml ---------------------------------------------------------------------- diff --git a/docs/topics/impala_troubleshooting.xml b/docs/topics/impala_troubleshooting.xml new file mode 100644 index 0000000..f7ebe4e --- /dev/null +++ b/docs/topics/impala_troubleshooting.xml @@ -0,0 +1,447 @@ +<?xml version="1.0" encoding="UTF-8"?> +<!DOCTYPE concept PUBLIC "-//OASIS//DTD DITA Concept//EN" "concept.dtd"> +<concept id="troubleshooting"> + + <title>Troubleshooting Impala</title> + <prolog> + <metadata> + <data name="Category" value="Impala"/> + <data name="Category" value="Troubleshooting"/> + <data name="Category" value="Administrators"/> + <data name="Category" value="Developers"/> + <data name="Category" value="Data Analysts"/> + </metadata> + </prolog> + + <conbody> + + <p> + <indexterm audience="Cloudera">troubleshooting</indexterm> + Troubleshooting for Impala requires being able to diagnose and debug problems + with performance, network connectivity, out-of-memory conditions, disk space usage, + and crash or hang conditions in any of the Impala-related daemons. + </p> + + <p outputclass="toc inpage" audience="PDF"> + The following sections describe the general troubleshooting procedures to diagnose + different kinds of problems: + </p> + + </conbody> + + <concept id="trouble_sql"> + + <title>Troubleshooting Impala SQL Syntax Issues</title> + + <conbody> + + <p> + In general, if queries issued against Impala fail, you can try running these same queries against Hive. + </p> + + <ul> + <li> + If a query fails against both Impala and Hive, it is likely that there is a problem with your query or + other elements of your CDH environment: + <ul> + <li> + Review the <xref href="impala_langref.xml#langref">Language Reference</xref> to ensure your query is + valid. + </li> + + <li> + Check <xref href="impala_reserved_words.xml#reserved_words"/> to see if any database, table, + column, or other object names in your query conflict with Impala reserved words. + Quote those names with backticks (<codeph>``</codeph>) if so. + </li> + + <li> + Check <xref href="impala_functions.xml#builtins"/> to confirm whether Impala supports all the + built-in functions being used by your query, and whether argument and return types are the + same as you expect. + </li> + + <li> + Review the <xref href="impala_logging.xml#logs_debug">contents of the Impala logs</xref> for any information that may be useful in identifying the + source of the problem. + </li> + </ul> + </li> + + <li> + If a query fails against Impala but not Hive, it is likely that there is a problem with your Impala + installation. + </li> + </ul> + </conbody> + </concept> + + <concept id="trouble_io" rev="CDH-19201"> + <title>Troubleshooting I/O Capacity Problems</title> + <conbody> + <p> + Impala queries are typically I/O-intensive. If there is an I/O problem with storage devices, + or with HDFS itself, Impala queries could show slow response times with no obvious cause + on the Impala side. Slow I/O on even a single DataNode could result in an overall slowdown, because + queries involving clauses such as <codeph>ORDER BY</codeph>, <codeph>GROUP BY</codeph>, or <codeph>JOIN</codeph> + do not start returning results until all DataNodes have finished their work. + </p> + <p> + To test whether the Linux I/O system itself is performing as expected, run Linux commands like + the following on each DataNode: + </p> +<codeblock> +$ sudo sysctl -w vm.drop_caches=3 vm.drop_caches=0 +vm.drop_caches = 3 +vm.drop_caches = 0 +$ sudo dd if=/dev/sda bs=1M of=/dev/null count=1k +1024+0 records in +1024+0 records out +1073741824 bytes (1.1 GB) copied, 5.60373 s, 192 MB/s +$ sudo dd if=/dev/sdb bs=1M of=/dev/null count=1k +1024+0 records in +1024+0 records out +1073741824 bytes (1.1 GB) copied, 5.51145 s, 195 MB/s +$ sudo dd if=/dev/sdc bs=1M of=/dev/null count=1k +1024+0 records in +1024+0 records out +1073741824 bytes (1.1 GB) copied, 5.58096 s, 192 MB/s +$ sudo dd if=/dev/sdd bs=1M of=/dev/null count=1k +1024+0 records in +1024+0 records out +1073741824 bytes (1.1 GB) copied, 5.43924 s, 197 MB/s +</codeblock> + <p> + On modern hardware, a throughput rate of less than 100 MB/s typically indicates + a performance issue with the storage device. Correct the hardware problem before + continuing with Impala tuning or benchmarking. + </p> + </conbody> + </concept> + + + <concept id="trouble_cookbook"> + + <title>Impala Troubleshooting Quick Reference</title> + + <conbody> + + <p> + The following table lists common problems and potential solutions. + </p> + + <table> + <tgroup cols="3"> + <colspec colname="1" colwidth="10*"/> + <colspec colname="2" colwidth="30*"/> + <colspec colname="3" colwidth="30*"/> + <thead> + <row> + <entry> + Symptom + </entry> + <entry> + Explanation + </entry> + <entry> + Recommendation + </entry> + </row> + </thead> + <tbody> + <row> + <entry> + Impala takes a long time to start. + </entry> + <entry> + Impala instances with large numbers of tables, partitions, or data files take longer to start + because the metadata for these objects is broadcast to all <cmdname>impalad</cmdname> nodes and + cached. + </entry> + <entry> + Adjust timeout and synchronicity settings. + </entry> + </row> + <row> + <entry> + <p> + Joins fail to complete. + </p> + </entry> + <entry> + <p> + There may be insufficient memory. During a join, data from the second, third, and so on sets to + be joined is loaded into memory. If Impala chooses an inefficient join order or join mechanism, + the query could exceed the total memory available. + </p> + </entry> + <entry> + <p> + Start by gathering statistics with the <codeph>COMPUTE STATS</codeph> statement for each table + involved in the join. Consider specifying the <codeph>[SHUFFLE]</codeph> hint so that data from + the joined tables is split up between nodes rather than broadcast to each node. If tuning at the + SQL level is not sufficient, add more memory to your system or join smaller data sets. + </p> + </entry> + </row> + <row> + <entry> + <p> + Queries return incorrect results. + </p> + </entry> + <entry> + <p> + Impala metadata may be outdated after changes are performed in Hive. + </p> + </entry> + <entry> + <p> + Where possible, use the appropriate Impala statement (<codeph>INSERT</codeph>, <codeph>LOAD + DATA</codeph>, <codeph>CREATE TABLE</codeph>, <codeph>ALTER TABLE</codeph>, <codeph>COMPUTE + STATS</codeph>, and so on) rather than switching back and forth between Impala and Hive. Impala + automatically broadcasts the results of DDL and DML operations to all Impala nodes in the + cluster, but does not automatically recognize when such changes are made through Hive. After + inserting data, adding a partition, or other operation in Hive, refresh the metadata for the + table as described in <xref href="impala_refresh.xml#refresh"/>. + </p> + </entry> + </row> + <row> + <entry> + <p> + Queries are slow to return results. + </p> + </entry> + <entry> + <p> + Some <codeph>impalad</codeph> instances may not have started. Using a browser, connect to the + host running the Impala state store. Connect using an address of the form + <codeph>http://<varname>hostname</varname>:<varname>port</varname>/metrics</codeph>. + </p> + + <p> + <note> + Replace <varname>hostname</varname> and <varname>port</varname> with the hostname and port of + your Impala state store host machine and web server port. The default port is 25010. + </note> + The number of <codeph>impalad</codeph> instances listed should match the expected number of + <codeph>impalad</codeph> instances installed in the cluster. There should also be one + <codeph>impalad</codeph> instance installed on each DataNode + </p> + </entry> + <entry> + <p> + Ensure Impala is installed on all DataNodes. Start any <codeph>impalad</codeph> instances that + are not running. + </p> + </entry> + </row> + <row> + <entry> + <p> + Queries are slow to return results. + </p> + </entry> + <entry> + <p> + Impala may not be configured to use native checksumming. Native checksumming uses + machine-specific instructions to compute checksums over HDFS data very quickly. Review Impala + logs. If you find instances of "<codeph>INFO util.NativeCodeLoader: Loaded the + native-hadoop</codeph>" messages, native checksumming is not enabled. + </p> + </entry> + <entry> + <p> + Ensure Impala is configured to use native checksumming as described in + <xref href="impala_config_performance.xml#config_performance"/>. + </p> + </entry> + </row> + <row> + <entry> + <p> + Queries are slow to return results. + </p> + </entry> + <entry> + <p> + Impala may not be configured to use data locality tracking. + </p> + </entry> + <entry> + <p> + Test Impala for data locality tracking and make configuration changes as necessary. Information + on this process can be found in <xref href="impala_config_performance.xml#config_performance"/>. + </p> + </entry> + </row> + <row> + <entry> + <p> + Attempts to complete Impala tasks such as executing INSERT-SELECT actions fail. The Impala logs + include notes that files could not be opened due to permission denied. + </p> + </entry> + <entry> + <p> + This can be the result of permissions issues. For example, you could use the Hive shell as the + hive user to create a table. After creating this table, you could attempt to complete some + action, such as an INSERT-SELECT on the table. Because the table was created using one user and + the INSERT-SELECT is attempted by another, this action may fail due to permissions issues. + </p> + </entry> + <entry> + <p> + In general, ensure the Impala user has sufficient permissions. In the preceding example, ensure + the Impala user has sufficient permissions to the table that the Hive user created. + </p> + </entry> + </row> + <row rev="IMP-1210"> + <entry> + <p> + Impala fails to start up, with the <cmdname>impalad</cmdname> logs referring to errors connecting + to the statestore service and attempts to re-register. + </p> + </entry> + <entry> + <p> + A large number of databases, tables, partitions, and so on can require metadata synchronization, + particularly on startup, that takes longer than the default timeout for the statestore service. + </p> + </entry> + <entry> + <p> + Configure the statestore timeout value and possibly other settings related to the frequency of + statestore updates and metadata loading. See + <xref href="impala_timeouts.xml#statestore_timeout"/> and + <xref href="impala_scalability.xml#statestore_scalability"/>. + </p> + </entry> + </row> + </tbody> + </tgroup> + </table> + + <p audience="Cloudera"> + Some or all of these settings might also be useful. +<codeblock>NUM_SCANNER_THREADS: 0 +ABORT_ON_DEFAULT_LIMIT_EXCEEDED: 0 +MAX_IO_BUFFERS: 0 +DEFAULT_ORDER_BY_LIMIT: -1 +BATCH_SIZE: 0 +NUM_NODES: 0 +DISABLE_CODEGEN: 0 +MAX_ERRORS: 0 +ABORT_ON_ERROR: 0 +MAX_SCAN_RANGE_LENGTH: 0 +ALLOW_UNSUPPORTED_FORMATS: 0 +SUPPORT_START_OVER: false +DEBUG_ACTION: +MEM_LIMIT: 0 +</codeblock> + </p> + </conbody> + </concept> + + <concept audience="Cloudera" id="core_dumps"> + + <title>Enabling Core Dumps for Impala</title> + + <conbody> + + <p> + Fill in details, then unhide. + </p> + + <p> + From Nong: + </p> + + <p> + In a CM-managed cluster, search for "core" from the impala configuration page. You should see the "enable + core dump" config. + </p> + + <p> + From <xref href="impala_config_options.xml#config_options"/>: + </p> + +<codeblock>export ENABLE_CORE_DUMPS=${ENABLE_COREDUMPS:-false}</codeblock> + + <note conref="../shared/impala_common.xml#common/core_dump_considerations"/> + + <p></p> + </conbody> + </concept> + + <concept audience="Cloudera" id="io_throughput"> + <title>Verifying I/O Throughput</title> + <conbody> + <p> + Optimal Impala query performance depends on being able to perform I/O across multiple storage devices + in parallel, with the data transferred at or close to the maximum throughput for each device. + If a hardware or configuration issue causes a reduction in I/O throughput, even if the problem only + affects a subset of storage devices, you might experience + slow query performance that cannot be improved by using regular SQL tuning techniques. + </p> + <p> + As a general guideline, expect each commodity storage device (for example, a standard rotational + hard drive) to be able to transfer approximately 100 MB per second. If you see persistent slow query + perormance, examine the Impala logs to check + </p> + +<codeblock> +<![CDATA[ +Useful test from beta at Visa. +SME: Jayant@ + +Symptoms: +* Queries running slow +* Scan rate of IO in Impala logs show noticeably less than expected IO rate for each disk (typical commodity disk should provide ~100 MB/s + +Actions: +* Validate disk read from OS to confirm no issue at hardware or OS level +* Validate disk read at HDFS to see if issue at HDFS config + +Specifics: +Testing Linux and hardware IO: +# First running: +sudo sysctl -w vm.drop_caches=3 vm.drop_caches=0 + +# Then Running: +sudo dd if=/dev/sda bs=1M of=/dev/null count=1k +& sudo dd if=/dev/sdb bs=1M of=/dev/null count=1k +& sudo dd if=/dev/sdc bs=1M of=/dev/null count=1k +& sudo dd if=/dev/sdd bs=1M of=/dev/null count=1k & wait + +Testing HDFS IO: +# You can use TestDFSIO. Its documented here ; http://answers.oreilly.com/topic/460-how-to-benchmark-a-hadoop-cluster/ +# You can also use sar, dd and iostat for monitoring the disk. + +# writes 10 files each of 1000 MB +hadoop jar $HADOOP_INSTALL/hadoop-*-test.jar TestDFSIO -write -nrFiles 10 -fileSize 1000 + +# run the read benchmark +hadoop jar $HADOOP_INSTALL/hadoop-*-test.jar TestDFSIO -read -nrFiles 10 -fileSize 1000 + +# clean up the data +hadoop jar $HADOOP_INSTALL/hadoop-*-test.jar TestDFSIO -clean +]]> +</codeblock> + + </conbody> + </concept> + + <concept id="webui_snippet" audience="PDF"> + <title conref="impala_webui.xml#webui/webui_title"/> + <conbody> + <p conref="impala_webui.xml#webui/webui_intro"/> + <p> + For full details, see <xref href="impala_webui.xml#webui"/>. + </p> + </conbody> + </concept> + +</concept> http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/1fcc8cee/docs/topics/impala_webui.xml ---------------------------------------------------------------------- diff --git a/docs/topics/impala_webui.xml b/docs/topics/impala_webui.xml new file mode 100644 index 0000000..38a5f00 --- /dev/null +++ b/docs/topics/impala_webui.xml @@ -0,0 +1,650 @@ +<?xml version="1.0" encoding="UTF-8"?> +<!DOCTYPE concept PUBLIC "-//OASIS//DTD DITA Concept//EN" "concept.dtd"> +<concept id="webui"> + + <title id="webui_title">Impala Web User Interface for Debugging</title> + <titlealts audience="PDF"><navtitle>Web User Interface</navtitle></titlealts> + <prolog> + <metadata> + <data name="Category" value="Impala"/> + <data name="Category" value="Troubleshooting"/> + <data name="Category" value="Administrators"/> + <data name="Category" value="Developers"/> + <data name="Category" value="Data Analysts"/> + </metadata> + </prolog> + + <conbody> + + <p id="webui_intro"> + <indexterm audience="Cloudera">web UI</indexterm> + <indexterm audience="Cloudera">debug UI</indexterm> + Each of the Impala daemons (<cmdname>impalad</cmdname>, <cmdname>statestored</cmdname>, + and <cmdname>catalogd</cmdname>) includes a built-in web server that displays + diagnostic and status information: + <ul> + <li> + <p> + The <cmdname>impalad</cmdname> web UI (default port: 25000) includes + information about configuration settings, running and completed queries, and associated performance and + resource usage for queries. In particular, the <uicontrol>Details</uicontrol> link for each query displays + alternative views of the query including a graphical representation of the plan, and the + output of the <codeph>EXPLAIN</codeph>, <codeph>SUMMARY</codeph>, and <codeph>PROFILE</codeph> + statements from <cmdname>impala-shell</cmdname>. + Each host that runs the <cmdname>impalad</cmdname> daemon has + its own instance of the web UI, with details about those queries for which that + host served as the coordinator. To get a consolidated view for all queries, + it is usually more convenient to use the charts, graphs, and other monitoring + features in Cloudera Manager. The <cmdname>impalad</cmdname> web UI is mainly + for diagnosing query problems that can be traced to a particular node. + </p> + </li> + <li> + <p> + The <cmdname>statestored</cmdname> web UI (default port: 25010) includes + information about memory usage, configuration settings, and ongoing health checks + performed by this daemon. Because there is only a single instance of this + daemon within any cluster, you view the web UI only on the particular host + that serves as the Impala Statestore. + </p> + </li> + <li> + <p> + The <cmdname>catalogd</cmdname> web UI (default port: 25020) includes + information about the databases, tables, and other objects managed by Impala, + in addition to the resource usage and configuration settings of the daemon itself. + The catalog information is represented as the underlying Thrift data structures. + Because there is only a single instance of this daemon within any cluster, you view the + web UI only on the particular host that serves as the Impala Catalog Server. + </p> + </li> + </ul> + </p> + + <note> + <p> + The web user interface is primarily for problem diagnosis and troubleshooting. The items listed and their + formats are subject to change. To monitor Impala health, particularly across the entire cluster at once, use + the Cloudera Manager interface. + </p> + </note> + + <p outputclass="toc inpage"/> + </conbody> + + <concept id="webui_impalad"> + + <title>Debug Web UI for impalad</title> + + <conbody> + + <p> + To debug and troubleshoot the <cmdname>impalad</cmdname> daemon using a web-based interface, open the URL + <codeph>http://<varname>impala-server-hostname</varname>:25000/</codeph> in a browser. (For secure + clusters, use the prefix <codeph>https://</codeph> instead of <codeph>http://</codeph>.) Because each + Impala node produces its own set of debug information, choose a specific node that you are curious about or + suspect is having problems. + </p> + + <note> + To get a convenient picture of the health of all Impala nodes in a cluster, use the Cloudera Manager + interface, which collects the low-level operational information from all Impala nodes, and presents a + unified view of the entire cluster. + </note> + </conbody> + + <concept audience="Cloudera" id="webui_impalad_disabling"> + + <title>Turning off the Web UI for impalad</title> + + <conbody> + + <p></p> + </conbody> + </concept> + + <concept id="webui_impalad_main"> + + <title>Main Page</title> + + <conbody> + + <p> + By default, the main page of the debug web UI is at + <codeph>http://<varname>impala-server-hostname</varname>:25000/</codeph> (non-secure cluster) or + <codeph>https://<varname>impala-server-hostname</varname>:25000/</codeph> (secure cluster). + </p> + + <p> + This page lists the version of the <cmdname>impalad</cmdname> daemon, plus basic hardware and software + information about the corresponding host, such as information about the CPU, memory, disks, and operating + system version. + </p> + </conbody> + </concept> + + <concept id="webui_impalad_backends"> + + <title>Backends Page</title> + + <conbody> + + <p> + By default, the <uicontrol>backends</uicontrol> page of the debug web UI is at + <codeph>http://<varname>impala-server-hostname</varname>:25000/backends</codeph> (non-secure cluster) or + <codeph>https://<varname>impala-server-hostname</varname>:25000/backends</codeph> (secure cluster). + </p> + + <p> + This page lists the host and port info for each of the <cmdname>impalad</cmdname> nodes in the cluster. + Because each <cmdname>impalad</cmdname> daemon knows about every other <cmdname>impalad</cmdname> daemon + through the statestore, this information should be the same regardless of which node you select. Links + take you to the corresponding debug web pages for any of the other nodes in the cluster. + </p> + </conbody> + </concept> + + <concept id="webui_impalad_catalog"> + + <title>Catalog Page</title> + + <conbody> + + <p> + By default, the <uicontrol>catalog</uicontrol> page of the debug web UI is at + <codeph>http://<varname>impala-server-hostname</varname>:25000/catalog</codeph> (non-secure cluster) or + <codeph>https://<varname>impala-server-hostname</varname>:25000/catalog</codeph> (secure cluster). + </p> + + <p> + This page displays a list of databases and associated tables recognized by this instance of + <cmdname>impalad</cmdname>. You can use this page to locate which database a table is in, check the + exact spelling of a database or table name, look for identical table names in multiple databases, and so + on. + </p> + </conbody> + </concept> + + <concept id="webui_impalad_logs"> + + <title>Logs Page</title> + <prolog> + <metadata> + <data name="Category" value="Logs"/> + </metadata> + </prolog> + + <conbody> + + <p> + By default, the <uicontrol>logs</uicontrol> page of the debug web UI is at + <codeph>http://<varname>impala-server-hostname</varname>:25000/logs</codeph> (non-secure cluster) or + <codeph>https://<varname>impala-server-hostname</varname>:25000/logs</codeph> (secure cluster). + </p> + + <p> + This page shows the last portion of the <filepath>impalad.INFO</filepath> log file, the most detailed of + the info, warning, and error logs for the <cmdname>impalad</cmdname> daemon. You can refer here to see + the details of the most recent operations, whether the operations succeeded or encountered errors. This + central page can be more convenient than looking around the filesystem for the log files, which could be + in different locations on clusters that use Cloudera Manager or not. + </p> + </conbody> + </concept> + + <concept id="webui_impalad_memz"> + + <title>Memz Page</title> + + <conbody> + + <p> + By default, the <uicontrol>memz</uicontrol> page of the debug web UI is at + <codeph>http://<varname>impala-server-hostname</varname>:25000/memz</codeph> (non-secure cluster) or + <codeph>https://<varname>impala-server-hostname</varname>:25000/memz</codeph> (secure cluster). + </p> + + <p> + This page displays summary and detailed information about memory usage by the <cmdname>impalad</cmdname> + daemon. You can see the memory limit in effect for the node, and how much of that memory Impala is + currently using. + </p> + </conbody> + </concept> + + <concept id="webui_impalad_metrics"> + + <title>Metrics Page</title> + + <conbody> + + <p> + By default, the <uicontrol>metrics</uicontrol> page of the debug web UI is at + <codeph>http://<varname>impala-server-hostname</varname>:25000/metrics</codeph> (non-secure cluster) or + <codeph>https://<varname>impala-server-hostname</varname>:25000/metrics</codeph> (secure cluster). + </p> + + <p> + This page displays the current set of metrics: counters and flags representing various aspects of + <cmdname>impalad</cmdname> internal operation. For the meanings of these metrics, see + <xref href="http://www.cloudera.com/documentation/enterprise/latest/topics/cm_metrics_impala.html" scope="external" format="html">Impala + Metrics</xref> in the Cloudera Manager documentation. + </p> + </conbody> + </concept> + + <concept id="webui_impalad_queries"> + + <title>Queries Page</title> + + <conbody> + + <p> + By default, the <uicontrol>queries</uicontrol> page of the debug web UI is at + <codeph>http://<varname>impala-server-hostname</varname>:25000/queries</codeph> (non-secure cluster) or + <codeph>https://<varname>impala-server-hostname</varname>:25000/queries</codeph> (secure cluster). + </p> + + <p> + This page lists all currently running queries, plus any completed queries whose details still reside in + memory. The queries are listed in reverse chronological order, with the most recent at the top. (You can + control the amount of memory devoted to completed queries by specifying the + <codeph>--query_log_size</codeph> startup option for <cmdname>impalad</cmdname>.) + </p> + + <p> + On this page, you can see at a glance how many SQL statements are failing (<codeph>State</codeph> value + of <codeph>EXCEPTION</codeph>), how large the result sets are (<codeph># rows fetched</codeph>), and how + long each statement took (<codeph>Start Time</codeph> and <codeph>End Time</codeph>). + </p> + + <p> + Each query has an associated link that displays the detailed query profile, which you can examine to + understand the performance characteristics of that query. See + <xref href="impala_explain_plan.xml#perf_profile"/> for details. + </p> + </conbody> + </concept> + + <concept id="webui_impalad_sessions"> + + <title>Sessions Page</title> + + <conbody> + + <p> + By default, the <uicontrol>sessions</uicontrol> page of the debug web UI is at + <codeph>http://<varname>impala-server-hostname</varname>:25000/sessions</codeph> (non-secure cluster) or + <codeph>https://<varname>impala-server-hostname</varname>:25000/sessions</codeph> (secure cluster). + </p> + + <p> + This page displays information about the sessions currently connected to this <cmdname>impalad</cmdname> + instance. For example, sessions could include connections from the <cmdname>impala-shell</cmdname> + command, JDBC or ODBC applications, or the Impala Query UI in the Hue web interface. + </p> + </conbody> + </concept> + + <concept id="webui_impalad_threadz"> + + <title>Threadz Page</title> + + <conbody> + + <p> + By default, the <uicontrol>threadz</uicontrol> page of the debug web UI is at + <codeph>http://<varname>impala-server-hostname</varname>:25000/threadz</codeph> (non-secure cluster) or + <codeph>https://<varname>impala-server-hostname</varname>:25000/threadz</codeph> (secure cluster). + </p> + + <p> + This page displays information about the threads used by this instance of <cmdname>impalad</cmdname>, + and shows which categories they are grouped into. Making use of this information requires substantial + knowledge about Impala internals. + </p> + </conbody> + </concept> + + <concept id="webui_impalad_varz"> + + <title>Varz Page</title> + + <conbody> + + <p> + By default, the <uicontrol>varz</uicontrol> page of the debug web UI is at + <codeph>http://<varname>impala-server-hostname</varname>:25000/varz</codeph> (non-secure cluster) or + <codeph>https://<varname>impala-server-hostname</varname>:25000/varz</codeph> (secure cluster). + </p> + + <p> + This page shows the configuration settings in effect when this instance of <cmdname>impalad</cmdname> + communicates with other Hadoop components such as HDFS and YARN. These settings are collected from a set + of configuration files; Impala might not actually make use of all settings. + </p> + + <p> + The bottom of this page also lists all the command-line settings in effect for this instance of + <cmdname>impalad</cmdname>. See <xref href="impala_config_options.xml#config_options"/> for information + about modifying these values. + </p> + </conbody> + </concept> + </concept> + + <concept audience="Cloudera" id="webui_statestored"> + + <title>Debug Web UI for statestored</title> + + <conbody> + + <p></p> + </conbody> + + <concept audience="Cloudera" id="webui_statestored_disabling"> + + <title>Turning off the Web UI for statestored</title> + + <conbody> + + <p></p> + </conbody> + </concept> + + <concept id="webui_statestored_main"> + + <title>Main Page</title> + + <conbody> + + <p> + By default, the main page of the debug web UI is at + <codeph>http://<varname>impala-server-hostname</varname>:25010/</codeph> (non-secure cluster) or + <codeph>https://<varname>impala-server-hostname</varname>:25010/</codeph> (secure cluster). + </p> + + <p> + This page lists the version of the <cmdname>impalad</cmdname> daemon, plus basic hardware and software + information about the corresponding host, such as information about the CPU, memory, disks, and operating + system version. + </p> + </conbody> + </concept> + + <concept id="webui_statestored_logs"> + + <title>Logs Page</title> + + <conbody> + + <p> + By default, the <uicontrol>logs</uicontrol> page of the debug web UI is at + <codeph>http://<varname>impala-server-hostname</varname>:25010/logs</codeph> (non-secure cluster) or + <codeph>https://<varname>impala-server-hostname</varname>:25010/logs</codeph> (secure cluster). + </p> + + <p> + This page shows the last portion of the <filepath>impalad.INFO</filepath> log file, the most detailed of + the info, warning, and error logs for the <cmdname>impalad</cmdname> daemon. You can refer here to see + the details of the most recent operations, whether the operations succeeded or encountered errors. This + central page can be more convenient than looking around the filesystem for the log files, which could be + in different locations on clusters that use Cloudera Manager or not. + </p> + </conbody> + </concept> + + <concept id="webui_statestored_memz"> + + <title>Memz Page</title> + + <conbody> + + <p> + By default, the <uicontrol>memz</uicontrol> page of the debug web UI is at + <codeph>http://<varname>impala-server-hostname</varname>:25010/memz</codeph> (non-secure cluster) or + <codeph>https://<varname>impala-server-hostname</varname>:25010/memz</codeph> (secure cluster). + </p> + + <p> + This page displays summary and detailed information about memory usage by the <cmdname>impalad</cmdname> + daemon. You can see the memory limit in effect for the node, and how much of that memory Impala is + currently using. + </p> + </conbody> + </concept> + + <concept id="webui_statestored_metrics"> + + <title>Metrics Page</title> + + <conbody> + + <p> + By default, the <uicontrol>metrics</uicontrol> page of the debug web UI is at + <codeph>http://<varname>impala-server-hostname</varname>:25010/metrics</codeph> (non-secure cluster) or + <codeph>https://<varname>impala-server-hostname</varname>:25010/metrics</codeph> (secure cluster). + </p> + + <p> + This page displays the current set of metrics: counters and flags representing various aspects of + <cmdname>impalad</cmdname> internal operation. For the meanings of these metrics, see + <xref href="http://www.cloudera.com/documentation/enterprise/latest/topics/cm_metrics_impala.html" scope="external" format="html">Impala + Metrics</xref> in the Cloudera Manager documentation. + </p> + </conbody> + </concept> + + <concept id="webui_statestored_subscribers"> + + <title>Subscribers Page</title> + + <conbody> + + <p> + By default, the <uicontrol>subscribers</uicontrol> page of the debug web UI is at + <codeph>http://<varname>impala-server-hostname</varname>:25010/subscribers</codeph> (non-secure cluster) + or <codeph>https://<varname>impala-server-hostname</varname>:25010/subscribers</codeph> (secure cluster). + </p> + + <p> + This page displays information about... + </p> + </conbody> + </concept> + + <concept id="webui_statestored_threadz"> + + <title>Threadz Page</title> + + <conbody> + + <p> + By default, the <uicontrol>threadz</uicontrol> page of the debug web UI is at + <codeph>http://<varname>impala-server-hostname</varname>:25010/threadz</codeph> (non-secure cluster) or + <codeph>https://<varname>impala-server-hostname</varname>:25010/threadz</codeph> (secure cluster). + </p> + + <p> + This page displays information about the threads used by this instance of <cmdname>impalad</cmdname>, + and shows which categories they are grouped into. Making use of this information requires substantial + knowledge about Impala internals. + </p> + </conbody> + </concept> + + <concept id="webui_statestored_topics"> + + <title>Topics Page</title> + + <conbody> + + <p> + By default, the <uicontrol>topics</uicontrol> page of the debug web UI is at + <codeph>http://<varname>impala-server-hostname</varname>:25010/topics</codeph> (non-secure cluster) or + <codeph>https://<varname>impala-server-hostname</varname>:25010/topics</codeph> (secure cluster). + </p> + + <p> + This page displays information about... + </p> + </conbody> + </concept> + + <concept id="webui_statestored_varz"> + + <title>Varz Page</title> + + <conbody> + + <p> + By default, the <uicontrol>varz</uicontrol> page of the debug web UI is at + <codeph>http://<varname>impala-server-hostname</varname>:25010/varz</codeph> (non-secure cluster) or + <codeph>https://<varname>impala-server-hostname</varname>:25010/varz</codeph> (secure cluster). + </p> + + <p> + This page shows the configuration settings in effect when this instance of <cmdname>impalad</cmdname> + communicates with other Hadoop components such as HDFS and YARN. These settings are collected from a set + of configuration files; Impala might not actually make use of all settings. + </p> + + <p> + The bottom of this page also lists all the command-line settings in effect for this instance of + <cmdname>impalad</cmdname>. See <xref href="impala_config_options.xml#config_options"/> for information + about modifying these values. + </p> + </conbody> + </concept> + </concept> + + <concept audience="Cloudera" id="webui_catalogd"> + + <title>Debug Web UI for catalogd</title> + + <conbody> + + <p></p> + </conbody> + + <concept audience="Cloudera" id="webui_catalogd_disabling"> + + <title>Turning off the Web UI for impalad</title> + + <conbody> + + <p></p> + </conbody> + </concept> + + <concept id="webui_catalogd_main"> + + <title>Main Page</title> + + <conbody> + + <p> + By default, the main page of the debug web UI is at + <codeph>http://<varname>impala-server-hostname</varname>:25020/</codeph> (non-secure cluster) or + <codeph>https://<varname>impala-server-hostname</varname>:25020/</codeph> (secure cluster). + </p> + + <p> + This page lists the version of the <cmdname>impalad</cmdname> daemon, plus basic hardware and software + information about the corresponding host, such as information about the CPU, memory, disks, and operating + system version. + </p> + </conbody> + </concept> + + <concept id="webui_catalogd_catalog"> + + <title>Catalog Page</title> + + <conbody> + + <p> + By default, the <uicontrol>catalog</uicontrol> page of the debug web UI is at + <codeph>http://<varname>impala-server-hostname</varname>:25020/catalog</codeph> (non-secure cluster) or + <codeph>https://<varname>impala-server-hostname</varname>:25020/catalog</codeph> (secure cluster). + </p> + + <p> + This page displays a list of databases and associated tables recognized by this instance of + <cmdname>impalad</cmdname>. You can use this page to locate which database a table is in, check the + exact spelling of a database or table name, look for identical table names in multiple databases, and so + on. + </p> + </conbody> + </concept> + + <concept id="webui_catalogd_logs"> + + <title>Logs Page</title> + + <conbody> + + <p> + By default, the <uicontrol>logs</uicontrol> page of the debug web UI is at + <codeph>http://<varname>impala-server-hostname</varname>:25020/logs</codeph> (non-secure cluster) or + <codeph>https://<varname>impala-server-hostname</varname>:25020/logs</codeph> (secure cluster). + </p> + + <p> + This page shows the last portion of the <filepath>impalad.INFO</filepath> log file, the most detailed of + the info, warning, and error logs for the <cmdname>impalad</cmdname> daemon. You can refer here to see + the details of the most recent operations, whether the operations succeeded or encountered errors. This + central page can be more convenient than looking around the filesystem for the log files, which could be + in different locations on clusters that use Cloudera Manager or not. + </p> + </conbody> + </concept> + + <concept id="webui_catalogd_metrics"> + + <title>Metrics Page</title> + + <conbody> + + <p> + By default, the <uicontrol>metrics</uicontrol> page of the debug web UI is at + <codeph>http://<varname>impala-server-hostname</varname>:25020/metrics</codeph> (non-secure cluster) or + <codeph>https://<varname>impala-server-hostname</varname>:25020/metrics</codeph> (secure cluster). + </p> + + <p> + This page displays the current set of metrics: counters and flags representing various aspects of + <cmdname>impalad</cmdname> internal operation. For the meanings of these metrics, see + <xref href="http://www.cloudera.com/documentation/enterprise/latest/topics/cm_metrics_impala.html" scope="external" format="html">Impala + Metrics</xref> in the Cloudera Manager documentation. + </p> + </conbody> + </concept> + + <concept id="webui_catalogd_varz"> + + <title>Varz Page</title> + + <conbody> + + <p> + By default, the <uicontrol>varz</uicontrol> page of the debug web UI is at + <codeph>http://<varname>impala-server-hostname</varname>:25020/varz</codeph> (non-secure cluster) or + <codeph>https://<varname>impala-server-hostname</varname>:25020/varz</codeph> (secure cluster). + </p> + + <p> + This page shows the configuration settings in effect when this instance of <cmdname>impalad</cmdname> + communicates with other Hadoop components such as HDFS and YARN. These settings are collected from a set + of configuration files; Impala might not actually make use of all settings. + </p> + + <p> + The bottom of this page also lists all the command-line settings in effect for this instance of + <cmdname>impalad</cmdname>. See <xref href="impala_config_options.xml#config_options"/> for information + about modifying these values. + </p> + </conbody> + </concept> + </concept> +</concept>
