5.10 versions.

jrussell Tue, 01 Nov 2016 16:14:43 -0700

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/3c2c8f12/docs/topics/impala_explain.xml
----------------------------------------------------------------------
diff --git a/docs/topics/impala_explain.xml b/docs/topics/impala_explain.xml
index c9e8846..81cc17b 100644
--- a/docs/topics/impala_explain.xml
+++ b/docs/topics/impala_explain.xml
@@ -3,7 +3,7 @@
 <concept id="explain">
 
   <title>EXPLAIN Statement</title>
-  <titlealts><navtitle>EXPLAIN</navtitle></titlealts>
+  <titlealts audience="PDF"><navtitle>EXPLAIN</navtitle></titlealts>
   <prolog>
     <metadata>
       <data name="Category" value="Impala"/>
@@ -13,6 +13,9 @@
       <data name="Category" value="Planning"/>
       <data name="Category" value="Performance"/>
       <data name="Category" value="Troubleshooting"/>
+      <data name="Category" value="Administrators"/>
+      <data name="Category" value="Developers"/>
+      <data name="Category" value="Data Analysts"/>
     </metadata>
   </prolog>
 
@@ -96,11 +99,12 @@
 
     <p rev="1.2">
       When extended <codeph>EXPLAIN</codeph> output is enabled, 
<codeph>EXPLAIN</codeph> statements print
-      information about estimated memory requirements, minimum number of 
virtual cores, and so on that you can use
-      to fine-tune the resource management options explained in
-      <xref href="impala_resource_management.xml#rm_options"/>. (The estimated 
memory requirements are
-      intentionally on the high side, to allow a margin for error, to avoid 
cancelling a query unnecessarily if you
-      set the <codeph>MEM_LIMIT</codeph> option to the estimated memory 
figure.)
+      information about estimated memory requirements, minimum number of 
virtual cores, and so on.
+      <!--
+      that you can use to fine-tune the resource management options explained 
in <xref href="impala_resource_management.xml#rm_options"/>.
+      (The estimated memory requirements are intentionally on the high side, 
to allow a margin for error,
+      to avoid cancelling a query unnecessarily if you set the 
<codeph>MEM_LIMIT</codeph> option to the estimated memory figure.)
+      -->
     </p>
 
     <p>
@@ -145,9 +149,9 @@
       statement has additional information to use in deciding how to optimize 
the distributed query.
     </p>
 
-    <draft-comment translate="no">
-Re-run these examples with more substantial tables populated with data.
-</draft-comment>
+    <!-- To do:
+      Re-run these examples with more substantial tables populated with data.
+    -->
 
 <codeblock rev="1.2">[localhost:21000] &gt; set explain_level=extended;
 EXPLAIN_LEVEL set to extended


http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/3c2c8f12/docs/topics/impala_explain_level.xml
----------------------------------------------------------------------
diff --git a/docs/topics/impala_explain_level.xml 
b/docs/topics/impala_explain_level.xml
index f54e8a8..e0c30d2 100644
--- a/docs/topics/impala_explain_level.xml
+++ b/docs/topics/impala_explain_level.xml
@@ -3,6 +3,7 @@
 <concept rev="1.2" id="explain_level">
 
   <title>EXPLAIN_LEVEL Query Option</title>
+  <titlealts audience="PDF"><navtitle>EXPLAIN_LEVEL</navtitle></titlealts>
   <prolog>
     <metadata>
       <data name="Category" value="Impala"/>
@@ -11,6 +12,9 @@
       <data name="Category" value="Querying"/>
       <data name="Category" value="Performance"/>
       <data name="Category" value="Reports"/>
+      <data name="Category" value="Administrators"/>
+      <data name="Category" value="Developers"/>
+      <data name="Category" value="Data Analysts"/>
     </metadata>
   </prolog>
 
@@ -126,80 +130,84 @@
 <codeblock>[localhost:21000] &gt; create table t1 (x int, s string);
 [localhost:21000] &gt; set explain_level=1;
 [localhost:21000] &gt; explain select count(*) from t1;
-+------------------------------------------------------------------------------------+
-| Explain String                                                               
      |
-+------------------------------------------------------------------------------------+
-| Estimated Per-Host Requirements: Memory=10.00MB VCores=1                     
      |
-| WARNING: The following tables are missing relevant table and/or column 
statistics. |
-| explain_plan.t1                                                              
      |
-|                                                                              
      |
-| 03:AGGREGATE [MERGE FINALIZE]                                                
      |
-| |  output: sum(count(*))                                                     
      |
-| |                                                                            
      |
-| 02:EXCHANGE [PARTITION=UNPARTITIONED]                                        
      |
-| |                                                                            
      |
-| 01:AGGREGATE                                                                 
      |
-| |  output: count(*)                                                          
      |
-| |                                                                            
      |
-| 00:SCAN HDFS [explain_plan.t1]                                               
      |
-|    partitions=1/1 size=0B                                                    
      |
-+------------------------------------------------------------------------------------+
++------------------------------------------------------------------------+
+| Explain String                                                         |
++------------------------------------------------------------------------+
+| Estimated Per-Host Requirements: Memory=10.00MB VCores=1               |
+| WARNING: The following tables are missing relevant table and/or column |
+|   statistics.                                                          |
+| explain_plan.t1                                                        |
+|                                                                        |
+| 03:AGGREGATE [MERGE FINALIZE]                                          |
+| |  output: sum(count(*))                                               |
+| |                                                                      |
+| 02:EXCHANGE [PARTITION=UNPARTITIONED]                                  |
+| |                                                                      |
+| 01:AGGREGATE                                                           |
+| |  output: count(*)                                                    |
+| |                                                                      |
+| 00:SCAN HDFS [explain_plan.t1]                                         |
+|    partitions=1/1 size=0B                                              |
++------------------------------------------------------------------------+
 [localhost:21000] &gt; explain select * from t1;
-+------------------------------------------------------------------------------------+
-| Explain String                                                               
      |
-+------------------------------------------------------------------------------------+
-| Estimated Per-Host Requirements: Memory=-9223372036854775808B VCores=0       
      |
-| WARNING: The following tables are missing relevant table and/or column 
statistics. |
-| explain_plan.t1                                                              
      |
-|                                                                              
      |
-| 01:EXCHANGE [PARTITION=UNPARTITIONED]                                        
      |
-| |                                                                            
      |
-| 00:SCAN HDFS [explain_plan.t1]                                               
      |
-|    partitions=1/1 size=0B                                                    
      |
-+------------------------------------------------------------------------------------+
++------------------------------------------------------------------------+
+| Explain String                                                         |
++------------------------------------------------------------------------+
+| Estimated Per-Host Requirements: Memory=-9223372036854775808B VCores=0 |
+| WARNING: The following tables are missing relevant table and/or column |
+|   statistics.                                                          |
+| explain_plan.t1                                                        |
+|                                                                        |
+| 01:EXCHANGE [PARTITION=UNPARTITIONED]                                  |
+| |                                                                      |
+| 00:SCAN HDFS [explain_plan.t1]                                         |
+|    partitions=1/1 size=0B                                              |
++------------------------------------------------------------------------+
 [localhost:21000] &gt; set explain_level=2;
 [localhost:21000] &gt; explain select * from t1;
-+------------------------------------------------------------------------------------+
-| Explain String                                                               
      |
-+------------------------------------------------------------------------------------+
-| Estimated Per-Host Requirements: Memory=-9223372036854775808B VCores=0       
      |
-| WARNING: The following tables are missing relevant table and/or column 
statistics. |
-| explain_plan.t1                                                              
      |
-|                                                                              
      |
-| 01:EXCHANGE [PARTITION=UNPARTITIONED]                                        
      |
-| |  hosts=0 per-host-mem=unavailable                                          
      |
-| |  tuple-ids=0 row-size=19B cardinality=unavailable                          
      |
-| |                                                                            
      |
-| 00:SCAN HDFS [explain_plan.t1, PARTITION=RANDOM]                             
      |
-|    partitions=1/1 size=0B                                                    
      |
-|    table stats: unavailable                                                  
      |
-|    column stats: unavailable                                                 
      |
-|    hosts=0 per-host-mem=0B                                                   
      |
-|    tuple-ids=0 row-size=19B cardinality=unavailable                          
      |
-+------------------------------------------------------------------------------------+
++------------------------------------------------------------------------+
+| Explain String                                                         |
++------------------------------------------------------------------------+
+| Estimated Per-Host Requirements: Memory=-9223372036854775808B VCores=0 |
+| WARNING: The following tables are missing relevant table and/or column |
+|   statistics.                                                          |
+| explain_plan.t1                                                        |
+|                                                                        |
+| 01:EXCHANGE [PARTITION=UNPARTITIONED]                                  |
+| |  hosts=0 per-host-mem=unavailable                                    |
+| |  tuple-ids=0 row-size=19B cardinality=unavailable                    |
+| |                                                                      |
+| 00:SCAN HDFS [explain_plan.t1, PARTITION=RANDOM]                       |
+|    partitions=1/1 size=0B                                              |
+|    table stats: unavailable                                            |
+|    column stats: unavailable                                           |
+|    hosts=0 per-host-mem=0B                                             |
+|    tuple-ids=0 row-size=19B cardinality=unavailable                    |
++------------------------------------------------------------------------+
 [localhost:21000] &gt; set explain_level=3;
 [localhost:21000] &gt; explain select * from t1;
-+------------------------------------------------------------------------------------+
-| Explain String                                                               
      |
-+------------------------------------------------------------------------------------+
-| Estimated Per-Host Requirements: Memory=-9223372036854775808B VCores=0       
      |
-<b>| WARNING: The following tables are missing relevant table and/or column 
statistics. |</b>
-<b>| explain_plan.t1                                                           
         |</b>
-|                                                                              
      |
-| F01:PLAN FRAGMENT [PARTITION=UNPARTITIONED]                                  
      |
-|   01:EXCHANGE [PARTITION=UNPARTITIONED]                                      
      |
-|      hosts=0 per-host-mem=unavailable                                        
      |
-|      tuple-ids=0 row-size=19B cardinality=unavailable                        
      |
-|                                                                              
      |
-| F00:PLAN FRAGMENT [PARTITION=RANDOM]                                         
      |
-|   DATASTREAM SINK [FRAGMENT=F01, EXCHANGE=01, PARTITION=UNPARTITIONED]       
      |
-|   00:SCAN HDFS [explain_plan.t1, PARTITION=RANDOM]                           
      |
-|      partitions=1/1 size=0B                                                  
      |
-<b>|      table stats: unavailable                                             
         |</b>
-<b>|      column stats: unavailable                                            
         |</b>
-|      hosts=0 per-host-mem=0B                                                 
      |
-|      tuple-ids=0 row-size=19B cardinality=unavailable                        
      |
-+------------------------------------------------------------------------------------+
++------------------------------------------------------------------------+
+| Explain String                                                         |
++------------------------------------------------------------------------+
+| Estimated Per-Host Requirements: Memory=-9223372036854775808B VCores=0 |
+<b>| WARNING: The following tables are missing relevant table and/or column 
|</b>
+<b>|   statistics.                                                          
|</b>
+<b>| explain_plan.t1                                                        
|</b>
+|                                                                        |
+| F01:PLAN FRAGMENT [PARTITION=UNPARTITIONED]                            |
+|   01:EXCHANGE [PARTITION=UNPARTITIONED]                                |
+|      hosts=0 per-host-mem=unavailable                                  |
+|      tuple-ids=0 row-size=19B cardinality=unavailable                  |
+|                                                                        |
+| F00:PLAN FRAGMENT [PARTITION=RANDOM]                                   |
+|   DATASTREAM SINK [FRAGMENT=F01, EXCHANGE=01, PARTITION=UNPARTITIONED] |
+|   00:SCAN HDFS [explain_plan.t1, PARTITION=RANDOM]                     |
+|      partitions=1/1 size=0B                                            |
+<b>|      table stats: unavailable                                          
|</b>
+<b>|      column stats: unavailable                                         
|</b>
+|      hosts=0 per-host-mem=0B                                           |
+|      tuple-ids=0 row-size=19B cardinality=unavailable                  |
++------------------------------------------------------------------------+
 </codeblock>
 
     <p>
@@ -246,61 +254,63 @@
 
 <codeblock>[localhost:21000] &gt; set explain_level=1;
 [localhost:21000] &gt; explain select one.*, two.*, three.* from t1 one, t1 
two, t1 three where one.x = two.x and two.x = three.x;
-+------------------------------------------------------------------------------------+
-| Explain String                                                               
      |
-+------------------------------------------------------------------------------------+
-| Estimated Per-Host Requirements: Memory=4.00GB VCores=3                      
      |
-|                                                                              
      |
-| 07:EXCHANGE [PARTITION=UNPARTITIONED]                                        
      |
-| |                                                                            
      |
-<b>| 04:HASH JOIN [INNER JOIN, BROADCAST]                                      
         |</b>
-| |  hash predicates: two.x = three.x                                          
      |
-| |                                                                            
      |
-<b>| |--06:EXCHANGE [BROADCAST]                                                
         |</b>
-| |  |                                                                         
      |
-| |  02:SCAN HDFS [explain_plan.t1 three]                                      
      |
-| |     partitions=1/1 size=0B                                                 
      |
-| |                                                                            
      |
-<b>| 03:HASH JOIN [INNER JOIN, BROADCAST]                                      
         |</b>
-| |  hash predicates: one.x = two.x                                            
      |
-| |                                                                            
      |
-<b>| |--05:EXCHANGE [BROADCAST]                                                
         |</b>
-| |  |                                                                         
      |
-| |  01:SCAN HDFS [explain_plan.t1 two]                                        
      |
-| |     partitions=1/1 size=0B                                                 
      |
-| |                                                                            
      |
-| 00:SCAN HDFS [explain_plan.t1 one]                                           
      |
-|    partitions=1/1 size=0B                                                    
      |
-+------------------------------------------------------------------------------------+
-[localhost:21000] &gt; explain select one.*, two.*, three.* from t1 one join 
[shuffle] t1 two join t1 three where one.x = two.x and two.x = three.x;
-+------------------------------------------------------------------------------------+
-| Explain String                                                               
      |
-+------------------------------------------------------------------------------------+
-| Estimated Per-Host Requirements: Memory=4.00GB VCores=3                      
      |
-|                                                                              
      |
-| 08:EXCHANGE [PARTITION=UNPARTITIONED]                                        
      |
-| |                                                                            
      |
-<b>| 04:HASH JOIN [INNER JOIN, BROADCAST]                                      
         |</b>
-| |  hash predicates: two.x = three.x                                          
      |
-| |                                                                            
      |
-<b>| |--07:EXCHANGE [BROADCAST]                                                
         |</b>
-| |  |                                                                         
      |
-| |  02:SCAN HDFS [explain_plan.t1 three]                                      
      |
-| |     partitions=1/1 size=0B                                                 
      |
-| |                                                                            
      |
-<b>| 03:HASH JOIN [INNER JOIN, PARTITIONED]                                    
         |</b>
-| |  hash predicates: one.x = two.x                                            
      |
-| |                                                                            
      |
-<b>| |--06:EXCHANGE [PARTITION=HASH(two.x)]                                    
         |</b>
-| |  |                                                                         
      |
-| |  01:SCAN HDFS [explain_plan.t1 two]                                        
      |
-| |     partitions=1/1 size=0B                                                 
      |
-| |                                                                            
      |
-<b>| 05:EXCHANGE [PARTITION=HASH(one.x)]                                       
         |</b>
-| |                                                                            
      |
-| 00:SCAN HDFS [explain_plan.t1 one]                                           
      |
-|    partitions=1/1 size=0B                                                    
      |
-+------------------------------------------------------------------------------------+
++---------------------------------------------------------+
+| Explain String                                          |
++---------------------------------------------------------+
+| Estimated Per-Host Requirements: Memory=4.00GB VCores=3 |
+|                                                         |
+| 07:EXCHANGE [PARTITION=UNPARTITIONED]                   |
+| |                                                       |
+<b>| 04:HASH JOIN [INNER JOIN, BROADCAST]                    |</b>
+| |  hash predicates: two.x = three.x                     |
+| |                                                       |
+<b>| |--06:EXCHANGE [BROADCAST]                              |</b>
+| |  |                                                    |
+| |  02:SCAN HDFS [explain_plan.t1 three]                 |
+| |     partitions=1/1 size=0B                            |
+| |                                                       |
+<b>| 03:HASH JOIN [INNER JOIN, BROADCAST]                    |</b>
+| |  hash predicates: one.x = two.x                       |
+| |                                                       |
+<b>| |--05:EXCHANGE [BROADCAST]                              |</b>
+| |  |                                                    |
+| |  01:SCAN HDFS [explain_plan.t1 two]                   |
+| |     partitions=1/1 size=0B                            |
+| |                                                       |
+| 00:SCAN HDFS [explain_plan.t1 one]                      |
+|    partitions=1/1 size=0B                               |
++---------------------------------------------------------+
+[localhost:21000] &gt; explain select one.*, two.*, three.*
+                  &gt; from t1 one join [shuffle] t1 two join t1 three
+                  &gt; where one.x = two.x and two.x = three.x;
++---------------------------------------------------------+
+| Explain String                                          |
++---------------------------------------------------------+
+| Estimated Per-Host Requirements: Memory=4.00GB VCores=3 |
+|                                                         |
+| 08:EXCHANGE [PARTITION=UNPARTITIONED]                   |
+| |                                                       |
+<b>| 04:HASH JOIN [INNER JOIN, BROADCAST]                    |</b>
+| |  hash predicates: two.x = three.x                     |
+| |                                                       |
+<b>| |--07:EXCHANGE [BROADCAST]                              |</b>
+| |  |                                                    |
+| |  02:SCAN HDFS [explain_plan.t1 three]                 |
+| |     partitions=1/1 size=0B                            |
+| |                                                       |
+<b>| 03:HASH JOIN [INNER JOIN, PARTITIONED]                  |</b>
+| |  hash predicates: one.x = two.x                       |
+| |                                                       |
+<b>| |--06:EXCHANGE [PARTITION=HASH(two.x)]                  |</b>
+| |  |                                                    |
+| |  01:SCAN HDFS [explain_plan.t1 two]                   |
+| |     partitions=1/1 size=0B                            |
+| |                                                       |
+<b>| 05:EXCHANGE [PARTITION=HASH(one.x)]                     |</b>
+| |                                                       |
+| 00:SCAN HDFS [explain_plan.t1 one]                      |
+|    partitions=1/1 size=0B                               |
++---------------------------------------------------------+
 </codeblock>
 
     <p>
@@ -314,7 +324,9 @@
     </p>
 
 <codeblock>[localhost:21000] &gt; set explain_level=0;
-[localhost:21000] &gt; explain select one.*, two.*, three.* from t1 one join 
[shuffle] t1 two join t1 three where one.x = two.x and two.x = three.x;
+[localhost:21000] &gt; explain select one.*, two.*, three.*
+                  &gt; from t1 one join [shuffle] t1 two join t1 three
+                  &gt; where one.x = two.x and two.x = three.x;
 +---------------------------------------------------------+
 | Explain String                                          |
 +---------------------------------------------------------+

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/3c2c8f12/docs/topics/impala_explain_plan.xml
----------------------------------------------------------------------
diff --git a/docs/topics/impala_explain_plan.xml 
b/docs/topics/impala_explain_plan.xml
index 44c8b74..4fd721f 100644
--- a/docs/topics/impala_explain_plan.xml
+++ b/docs/topics/impala_explain_plan.xml
@@ -4,7 +4,19 @@
 
   <title>Understanding Impala Query Performance - EXPLAIN Plans and Query 
Profiles</title>
   <titlealts audience="PDF"><navtitle>EXPLAIN Plans and Query 
Profiles</navtitle></titlealts>
-  
+  <prolog>
+    <metadata>
+      <data name="Category" value="Performance"/>
+      <data name="Category" value="Impala"/>
+      <data name="Category" value="Querying"/>
+      <data name="Category" value="Troubleshooting"/>
+      <data name="Category" value="Reports"/>
+      <data name="Category" value="Concepts"/>
+      <data name="Category" value="Administrators"/>
+      <data name="Category" value="Developers"/>
+      <data name="Category" value="Data Analysts"/>
+    </metadata>
+  </prolog>
 
   <conbody>
 
@@ -14,7 +26,543 @@
       actually running the query itself.
     </p>
 
-    
+    <p rev="1.4.0">
+      For an overview of the physical performance characteristics for a query, 
issue the <codeph>SUMMARY</codeph>
+      statement in <cmdname>impala-shell</cmdname> immediately after executing 
a query. This condensed information
+      shows which phases of execution took the most time, and how the 
estimates for memory usage and number of rows
+      at each phase compare to the actual values.
+    </p>
+
+    <p>
+      To understand the detailed performance characteristics for a query, 
issue the <codeph>PROFILE</codeph>
+      statement in <cmdname>impala-shell</cmdname> immediately after executing 
a query. This low-level information
+      includes physical details about memory, CPU, I/O, and network usage, and 
thus is only available after the
+      query is actually run.
+    </p>
+
+    <p outputclass="toc inpage"/>
+
+    <p>
+      Also, see <xref href="impala_hbase.xml#hbase_performance"/>
+      and <xref href="impala_s3.xml#s3_performance"/>
+      for examples of interpreting
+      <codeph>EXPLAIN</codeph> plans for queries against HBase tables
+      <ph rev="2.2.0">and data stored in the Amazon Simple Storage System 
(S3)</ph>.
+    </p>
+  </conbody>
+
+  <concept id="perf_explain">
+
+    <title>Using the EXPLAIN Plan for Performance Tuning</title>
+
+    <conbody>
+
+      <p>
+        The <codeph><xref 
href="impala_explain.xml#explain">EXPLAIN</xref></codeph> statement gives you 
an outline
+        of the logical steps that a query will perform, such as how the work 
will be distributed among the nodes
+        and how intermediate results will be combined to produce the final 
result set. You can see these details
+        before actually running the query. You can use this information to 
check that the query will not operate in
+        some very unexpected or inefficient way.
+      </p>
+
+<!-- Turn into a conref in ciiu_langref too. Relocate to common.xml. -->
+
+<codeblock conref="impala_explain.xml#explain/explain_plan_simple"/>
+
+      <p conref="../shared/impala_common.xml#common/explain_interpret"/>
+
+      <p>
+        The <codeph>EXPLAIN</codeph> plan is also printed at the beginning of 
the query profile report described in
+        <xref href="#perf_profile"/>, for convenience in examining both the 
logical and physical aspects of the
+        query side-by-side.
+      </p>
+
+      <p rev="1.2">
+        The amount of detail displayed in the <codeph>EXPLAIN</codeph> output 
is controlled by the
+        <xref 
href="impala_explain_level.xml#explain_level">EXPLAIN_LEVEL</xref> query 
option. You typically
+        increase this setting from <codeph>normal</codeph> to 
<codeph>verbose</codeph> (or from <codeph>0</codeph>
+        to <codeph>1</codeph>) when doublechecking the presence of table and 
column statistics during performance
+        tuning, or when estimating query resource usage in conjunction with 
the resource management features in CDH
+        5.
+      </p>
+
+      <!-- To do:
+        This is a good place to have a few examples.
+      -->
     </conbody>
   </concept>
 
+  <concept id="perf_summary">
+
+    <title>Using the SUMMARY Report for Performance Tuning</title>
+
+    <conbody>
+
+      <p>
+        The <codeph><xref 
href="impala_shell_commands.xml#shell_commands">SUMMARY</xref></codeph> command 
within
+        the <cmdname>impala-shell</cmdname> interpreter gives you an 
easy-to-digest overview of the timings for the
+        different phases of execution for a query. Like the 
<codeph>EXPLAIN</codeph> plan, it is easy to see
+        potential performance bottlenecks. Like the <codeph>PROFILE</codeph> 
output, it is available after the
+        query is run and so displays actual timing numbers.
+      </p>
+
+      <p>
+        The <codeph>SUMMARY</codeph> report is also printed at the beginning 
of the query profile report described
+        in <xref href="#perf_profile"/>, for convenience in examining 
high-level and low-level aspects of the query
+        side-by-side.
+      </p>
+
+      <p>
+        For example, here is a query involving an aggregate function, on a 
single-node VM. The different stages of
+        the query and their timings are shown (rolled up for all nodes), along 
with estimated and actual values
+        used in planning the query. In this case, the <codeph>AVG()</codeph> 
function is computed for a subset of
+        data on each node (stage 01) and then the aggregated results from all 
nodes are combined at the end (stage
+        03). You can see which stages took the most time, and whether any 
estimates were substantially different
+        than the actual data distribution. (When examining the time values, be 
sure to consider the suffixes such
+        as <codeph>us</codeph> for microseconds and <codeph>ms</codeph> for 
milliseconds, rather than just looking
+        for the largest numbers.)
+      </p>
+
+<codeblock>[localhost:21000] &gt; select avg(ss_sales_price) from store_sales 
where ss_coupon_amt = 0;
++---------------------+
+| avg(ss_sales_price) |
++---------------------+
+| 37.80770926328327   |
++---------------------+
+[localhost:21000] &gt; summary;
++--------------+--------+----------+----------+-------+------------+----------+---------------+-----------------+
+| Operator     | #Hosts | Avg Time | Max Time | #Rows | Est. #Rows | Peak Mem 
| Est. Peak Mem | Detail          |
++--------------+--------+----------+----------+-------+------------+----------+---------------+-----------------+
+| 03:AGGREGATE | 1      | 1.03ms   | 1.03ms   | 1     | 1          | 48.00 KB 
| -1 B          | MERGE FINALIZE  |
+| 02:EXCHANGE  | 1      | 0ns      | 0ns      | 1     | 1          | 0 B      
| -1 B          | UNPARTITIONED   |
+| 01:AGGREGATE | 1      | 30.79ms  | 30.79ms  | 1     | 1          | 80.00 KB 
| 10.00 MB      |                 |
+| 00:SCAN HDFS | 1      | 5.45s    | 5.45s    | 2.21M | -1         | 64.05 MB 
| 432.00 MB     | tpc.store_sales |
++--------------+--------+----------+----------+-------+------------+----------+---------------+-----------------+
+</codeblock>
+
+      <p>
+        Notice how the longest initial phase of the query is measured in 
seconds (s), while later phases working on
+        smaller intermediate results are measured in milliseconds (ms) or even 
nanoseconds (ns).
+      </p>
+
+      <p>
+        Here is an example from a more complicated query, as it would appear 
in the <codeph>PROFILE</codeph>
+        output:
+      </p>
+
+<!-- This example taken from: 
https://github.com/cloudera/Impala/commit/af85d3b518089b8840ddea4356947e40d1aca9bd
 -->
+
+<codeblock>Operator              #Hosts   Avg Time   Max Time    #Rows  Est. 
#Rows  Peak Mem  Est. Peak Mem  Detail
+------------------------------------------------------------------------------------------------------------------------
+09:MERGING-EXCHANGE        1   79.738us   79.738us        5           5        
 0        -1.00 B  UNPARTITIONED
+05:TOP-N                   3   84.693us   88.810us        5           5  12.00 
KB       120.00 B
+04:AGGREGATE               3    5.263ms    6.432ms        5           5  44.00 
KB       10.00 MB  MERGE FINALIZE
+08:AGGREGATE               3   16.659ms   27.444ms   52.52K     600.12K   3.20 
MB       15.11 MB  MERGE
+07:EXCHANGE                3    2.644ms      5.1ms   52.52K     600.12K        
 0              0  HASH(o_orderpriority)
+03:AGGREGATE               3  342.913ms  966.291ms   52.52K     600.12K  10.80 
MB       15.11 MB
+02:HASH JOIN               3    2s165ms    2s171ms  144.87K     600.12K  13.63 
MB      941.01 KB  INNER JOIN, BROADCAST
+|--06:EXCHANGE             3    8.296ms    8.692ms   57.22K      15.00K        
 0              0  BROADCAST
+|  01:SCAN HDFS            2    1s412ms    1s978ms   57.22K      15.00K  24.21 
MB      176.00 MB  tpch.orders o
+00:SCAN HDFS               3    8s032ms    8s558ms    3.79M     600.12K  32.29 
MB      264.00 MB  tpch.lineitem l
+</codeblock>
+    </conbody>
+  </concept>
+
+  <concept id="perf_profile">
+
+    <title>Using the Query Profile for Performance Tuning</title>
+
+    <conbody>
+
+      <p>
+        The <codeph>PROFILE</codeph> statement, available in the 
<cmdname>impala-shell</cmdname> interpreter,
+        produces a detailed low-level report showing how the most recent query 
was executed. Unlike the
+        <codeph>EXPLAIN</codeph> plan described in <xref 
href="#perf_explain"/>, this information is only available
+        after the query has finished. It shows physical details such as the 
number of bytes read, maximum memory
+        usage, and so on for each node. You can use this information to 
determine if the query is I/O-bound or
+        CPU-bound, whether some network condition is imposing a bottleneck, 
whether a slowdown is affecting some
+        nodes but not others, and to check that recommended configuration 
settings such as short-circuit local
+        reads are in effect.
+      </p>
+
+      <p rev="CDH-29157">
+        By default, time values in the profile output reflect the wall-clock 
time taken by an operation.
+        For values denoting system time or user time, the measurement unit is 
reflected in the metric
+        name, such as <codeph>ScannerThreadsSysTime</codeph> or 
<codeph>ScannerThreadsUserTime</codeph>.
+        For example, a multi-threaded I/O operation might show a small figure 
for wall-clock time,
+        while the corresponding system time is larger, representing the sum of 
the CPU time taken by each thread.
+        Or a wall-clock time figure might be larger because it counts time 
spent waiting, while
+        the corresponding system and user time figures only measure the time 
while the operation
+        is actively using CPU cycles.
+      </p>
+
+      <p>
+        The <xref 
href="impala_explain_plan.xml#perf_explain"><codeph>EXPLAIN</codeph> 
plan</xref> is also printed
+        at the beginning of the query profile report, for convenience in 
examining both the logical and physical
+        aspects of the query side-by-side. The
+        <xref 
href="impala_explain_level.xml#explain_level">EXPLAIN_LEVEL</xref> query option 
also controls the
+        verbosity of the <codeph>EXPLAIN</codeph> output printed by the 
<codeph>PROFILE</codeph> command.
+      </p>
+
+      <!-- To do:
+        This is a good place to have a few more examples.
+      -->
+
+      <p>
+        Here is an example of a query profile, from a relatively 
straightforward query on a single-node
+        pseudo-distributed cluster to keep the output relatively brief.
+      </p>
+
+<codeblock>[localhost:21000] &gt; profile;
+Query Runtime Profile:
+Query (id=6540a03d4bee0691:4963d6269b210ebd):
+  Summary:
+    Session ID: ea4a197f1c7bf858:c74e66f72e3a33ba
+    Session Type: BEESWAX
+    Start Time: 2013-12-02 17:10:30.263067000
+    End Time: 2013-12-02 17:10:50.932044000
+    Query Type: QUERY
+    Query State: FINISHED
+    Query Status: OK
+    Impala Version: impalad version 1.2.1 RELEASE (build 
edb5af1bcad63d410bc5d47cc203df3a880e9324)
+    User: cloudera
+    Network Address: 127.0.0.1:49161
+    Default Db: stats_testing
+    Sql Statement: select t1.s, t2.s from t1 join t2 on (t1.id = t2.parent)
+    Plan:
+----------------
+Estimated Per-Host Requirements: Memory=2.09GB VCores=2
+
+PLAN FRAGMENT 0
+  PARTITION: UNPARTITIONED
+
+  4:EXCHANGE
+     cardinality: unavailable
+     per-host memory: unavailable
+     tuple ids: 0 1
+
+PLAN FRAGMENT 1
+  PARTITION: RANDOM
+
+  STREAM DATA SINK
+    EXCHANGE ID: 4
+    UNPARTITIONED
+
+  2:HASH JOIN
+  |  join op: INNER JOIN (BROADCAST)
+  |  hash predicates:
+  |    t1.id = t2.parent
+  |  cardinality: unavailable
+  |  per-host memory: 2.00GB
+  |  tuple ids: 0 1
+  |
+  |----3:EXCHANGE
+  |       cardinality: unavailable
+  |       per-host memory: 0B
+  |       tuple ids: 1
+  |
+  0:SCAN HDFS
+     table=stats_testing.t1 #partitions=1/1 size=33B
+     table stats: unavailable
+     column stats: unavailable
+     cardinality: unavailable
+     per-host memory: 32.00MB
+     tuple ids: 0
+
+PLAN FRAGMENT 2
+  PARTITION: RANDOM
+
+  STREAM DATA SINK
+    EXCHANGE ID: 3
+    UNPARTITIONED
+
+  1:SCAN HDFS
+     table=stats_testing.t2 #partitions=1/1 size=960.00KB
+     table stats: unavailable
+     column stats: unavailable
+     cardinality: unavailable
+     per-host memory: 96.00MB
+     tuple ids: 1
+----------------
+    Query Timeline: 20s670ms
+       - Start execution: 2.559ms (2.559ms)
+       - Planning finished: 23.587ms (21.27ms)
+       - Rows available: 666.199ms (642.612ms)
+       - First row fetched: 668.919ms (2.719ms)
+       - Unregister query: 20s668ms (20s000ms)
+  ImpalaServer:
+     - ClientFetchWaitTimer: 19s637ms
+     - RowMaterializationTimer: 167.121ms
+  Execution Profile 6540a03d4bee0691:4963d6269b210ebd:(Active: 837.815ms, % 
non-child: 0.00%)
+    Per Node Peak Memory Usage: impala-1.example.com:22000(7.42 MB)
+     - FinalizationTimer: 0ns
+    Coordinator Fragment:(Active: 195.198ms, % non-child: 0.00%)
+      MemoryUsage(500.0ms): 16.00 KB, 7.42 MB, 7.33 MB, 7.10 MB, 6.94 MB, 6.71 
MB, 6.56 MB, 6.40 MB, 6.17 MB, 6.02 MB, 5.79 MB, 5.63 MB, 5.48 MB, 5.25 MB, 
5.09 MB, 4.86 MB, 4.71 MB, 4.47 MB, 4.32 MB, 4.09 MB, 3.93 MB, 3.78 MB, 3.55 
MB, 3.39 MB, 3.16 MB, 3.01 MB, 2.78 MB, 2.62 MB, 2.39 MB, 2.24 MB, 2.08 MB, 
1.85 MB, 1.70 MB, 1.54 MB, 1.31 MB, 1.16 MB, 948.00 KB, 790.00 KB, 553.00 KB, 
395.00 KB, 237.00 KB
+      ThreadUsage(500.0ms): 1
+       - AverageThreadTokens: 1.00
+       - PeakMemoryUsage: 7.42 MB
+       - PrepareTime: 36.144us
+       - RowsProduced: 98.30K (98304)
+       - TotalCpuTime: 20s449ms
+       - TotalNetworkWaitTime: 191.630ms
+       - TotalStorageWaitTime: 0ns
+      CodeGen:(Active: 150.679ms, % non-child: 77.19%)
+         - CodegenTime: 0ns
+         - CompileTime: 139.503ms
+         - LoadTime: 10.7ms
+         - ModuleFileSize: 95.27 KB
+      EXCHANGE_NODE (id=4):(Active: 194.858ms, % non-child: 99.83%)
+         - BytesReceived: 2.33 MB
+         - ConvertRowBatchTime: 2.732ms
+         - DataArrivalWaitTime: 191.118ms
+         - DeserializeRowBatchTimer: 14.943ms
+         - FirstBatchArrivalWaitTime: 191.117ms
+         - PeakMemoryUsage: 7.41 MB
+         - RowsReturned: 98.30K (98304)
+         - RowsReturnedRate: 504.49 K/sec
+         - SendersBlockedTimer: 0ns
+         - SendersBlockedTotalTimer(*): 0ns
+    Averaged Fragment 1:(Active: 442.360ms, % non-child: 0.00%)
+      split sizes:  min: 33.00 B, max: 33.00 B, avg: 33.00 B, stddev: 0.00
+      completion times: min:443.720ms  max:443.720ms  mean: 443.720ms  
stddev:0ns
+      execution rates: min:74.00 B/sec  max:74.00 B/sec  mean:74.00 B/sec  
stddev:0.00 /sec
+      num instances: 1
+       - AverageThreadTokens: 1.00
+       - PeakMemoryUsage: 6.06 MB
+       - PrepareTime: 7.291ms
+       - RowsProduced: 98.30K (98304)
+       - TotalCpuTime: 784.259ms
+       - TotalNetworkWaitTime: 388.818ms
+       - TotalStorageWaitTime: 3.934ms
+      CodeGen:(Active: 312.862ms, % non-child: 70.73%)
+         - CodegenTime: 2.669ms
+         - CompileTime: 302.467ms
+         - LoadTime: 9.231ms
+         - ModuleFileSize: 95.27 KB
+      DataStreamSender (dst_id=4):(Active: 80.63ms, % non-child: 18.10%)
+         - BytesSent: 2.33 MB
+         - NetworkThroughput(*): 35.89 MB/sec
+         - OverallThroughput: 29.06 MB/sec
+         - PeakMemoryUsage: 5.33 KB
+         - SerializeBatchTime: 26.487ms
+         - ThriftTransmitTime(*): 64.814ms
+         - UncompressedRowBatchSize: 6.66 MB
+      HASH_JOIN_NODE (id=2):(Active: 362.25ms, % non-child: 3.92%)
+         - BuildBuckets: 1.02K (1024)
+         - BuildRows: 98.30K (98304)
+         - BuildTime: 12.622ms
+         - LoadFactor: 0.00
+         - PeakMemoryUsage: 6.02 MB
+         - ProbeRows: 3
+         - ProbeTime: 3.579ms
+         - RowsReturned: 98.30K (98304)
+         - RowsReturnedRate: 271.54 K/sec
+        EXCHANGE_NODE (id=3):(Active: 344.680ms, % non-child: 77.92%)
+           - BytesReceived: 1.15 MB
+           - ConvertRowBatchTime: 2.792ms
+           - DataArrivalWaitTime: 339.936ms
+           - DeserializeRowBatchTimer: 9.910ms
+           - FirstBatchArrivalWaitTime: 199.474ms
+           - PeakMemoryUsage: 156.00 KB
+           - RowsReturned: 98.30K (98304)
+           - RowsReturnedRate: 285.20 K/sec
+           - SendersBlockedTimer: 0ns
+           - SendersBlockedTotalTimer(*): 0ns
+      HDFS_SCAN_NODE (id=0):(Active: 13.616us, % non-child: 0.00%)
+         - AverageHdfsReadThreadConcurrency: 0.00
+         - AverageScannerThreadConcurrency: 0.00
+         - BytesRead: 33.00 B
+         - BytesReadLocal: 33.00 B
+         - BytesReadShortCircuit: 33.00 B
+         - NumDisksAccessed: 1
+         - NumScannerThreadsStarted: 1
+         - PeakMemoryUsage: 46.00 KB
+         - PerReadThreadRawHdfsThroughput: 287.52 KB/sec
+         - RowsRead: 3
+         - RowsReturned: 3
+         - RowsReturnedRate: 220.33 K/sec
+         - ScanRangesComplete: 1
+         - ScannerThreadsInvoluntaryContextSwitches: 26
+         - ScannerThreadsTotalWallClockTime: 55.199ms
+           - DelimiterParseTime: 2.463us
+           - MaterializeTupleTime(*): 1.226us
+           - ScannerThreadsSysTime: 0ns
+           - ScannerThreadsUserTime: 42.993ms
+         - ScannerThreadsVoluntaryContextSwitches: 1
+         - TotalRawHdfsReadTime(*): 112.86us
+         - TotalReadThroughput: 0.00 /sec
+    Averaged Fragment 2:(Active: 190.120ms, % non-child: 0.00%)
+      split sizes:  min: 960.00 KB, max: 960.00 KB, avg: 960.00 KB, stddev: 
0.00
+      completion times: min:191.736ms  max:191.736ms  mean: 191.736ms  
stddev:0ns
+      execution rates: min:4.89 MB/sec  max:4.89 MB/sec  mean:4.89 MB/sec  
stddev:0.00 /sec
+      num instances: 1
+       - AverageThreadTokens: 0.00
+       - PeakMemoryUsage: 906.33 KB
+       - PrepareTime: 3.67ms
+       - RowsProduced: 98.30K (98304)
+       - TotalCpuTime: 403.351ms
+       - TotalNetworkWaitTime: 34.999ms
+       - TotalStorageWaitTime: 108.675ms
+      CodeGen:(Active: 162.57ms, % non-child: 85.24%)
+         - CodegenTime: 3.133ms
+         - CompileTime: 148.316ms
+         - LoadTime: 12.317ms
+         - ModuleFileSize: 95.27 KB
+      DataStreamSender (dst_id=3):(Active: 70.620ms, % non-child: 37.14%)
+         - BytesSent: 1.15 MB
+         - NetworkThroughput(*): 23.30 MB/sec
+         - OverallThroughput: 16.23 MB/sec
+         - PeakMemoryUsage: 5.33 KB
+         - SerializeBatchTime: 22.69ms
+         - ThriftTransmitTime(*): 49.178ms
+         - UncompressedRowBatchSize: 3.28 MB
+      HDFS_SCAN_NODE (id=1):(Active: 118.839ms, % non-child: 62.51%)
+         - AverageHdfsReadThreadConcurrency: 0.00
+         - AverageScannerThreadConcurrency: 0.00
+         - BytesRead: 960.00 KB
+         - BytesReadLocal: 960.00 KB
+         - BytesReadShortCircuit: 960.00 KB
+         - NumDisksAccessed: 1
+         - NumScannerThreadsStarted: 1
+         - PeakMemoryUsage: 869.00 KB
+         - PerReadThreadRawHdfsThroughput: 130.21 MB/sec
+         - RowsRead: 98.30K (98304)
+         - RowsReturned: 98.30K (98304)
+         - RowsReturnedRate: 827.20 K/sec
+         - ScanRangesComplete: 15
+         - ScannerThreadsInvoluntaryContextSwitches: 34
+         - ScannerThreadsTotalWallClockTime: 189.774ms
+           - DelimiterParseTime: 15.703ms
+           - MaterializeTupleTime(*): 3.419ms
+           - ScannerThreadsSysTime: 1.999ms
+           - ScannerThreadsUserTime: 44.993ms
+         - ScannerThreadsVoluntaryContextSwitches: 118
+         - TotalRawHdfsReadTime(*): 7.199ms
+         - TotalReadThroughput: 0.00 /sec
+    Fragment 1:
+      Instance 6540a03d4bee0691:4963d6269b210ebf 
(host=impala-1.example.com:22000):(Active: 442.360ms, % non-child: 0.00%)
+        Hdfs split stats (&lt;volume id&gt;:&lt;# splits&gt;/&lt;split 
lengths&gt;): 0:1/33.00 B
+        MemoryUsage(500.0ms): 69.33 KB
+        ThreadUsage(500.0ms): 1
+         - AverageThreadTokens: 1.00
+         - PeakMemoryUsage: 6.06 MB
+         - PrepareTime: 7.291ms
+         - RowsProduced: 98.30K (98304)
+         - TotalCpuTime: 784.259ms
+         - TotalNetworkWaitTime: 388.818ms
+         - TotalStorageWaitTime: 3.934ms
+        CodeGen:(Active: 312.862ms, % non-child: 70.73%)
+           - CodegenTime: 2.669ms
+           - CompileTime: 302.467ms
+           - LoadTime: 9.231ms
+           - ModuleFileSize: 95.27 KB
+        DataStreamSender (dst_id=4):(Active: 80.63ms, % non-child: 18.10%)
+           - BytesSent: 2.33 MB
+           - NetworkThroughput(*): 35.89 MB/sec
+           - OverallThroughput: 29.06 MB/sec
+           - PeakMemoryUsage: 5.33 KB
+           - SerializeBatchTime: 26.487ms
+           - ThriftTransmitTime(*): 64.814ms
+           - UncompressedRowBatchSize: 6.66 MB
+        HASH_JOIN_NODE (id=2):(Active: 362.25ms, % non-child: 3.92%)
+          ExecOption: Build Side Codegen Enabled, Probe Side Codegen Enabled, 
Hash Table Built Asynchronously
+           - BuildBuckets: 1.02K (1024)
+           - BuildRows: 98.30K (98304)
+           - BuildTime: 12.622ms
+           - LoadFactor: 0.00
+           - PeakMemoryUsage: 6.02 MB
+           - ProbeRows: 3
+           - ProbeTime: 3.579ms
+           - RowsReturned: 98.30K (98304)
+           - RowsReturnedRate: 271.54 K/sec
+          EXCHANGE_NODE (id=3):(Active: 344.680ms, % non-child: 77.92%)
+             - BytesReceived: 1.15 MB
+             - ConvertRowBatchTime: 2.792ms
+             - DataArrivalWaitTime: 339.936ms
+             - DeserializeRowBatchTimer: 9.910ms
+             - FirstBatchArrivalWaitTime: 199.474ms
+             - PeakMemoryUsage: 156.00 KB
+             - RowsReturned: 98.30K (98304)
+             - RowsReturnedRate: 285.20 K/sec
+             - SendersBlockedTimer: 0ns
+             - SendersBlockedTotalTimer(*): 0ns
+        HDFS_SCAN_NODE (id=0):(Active: 13.616us, % non-child: 0.00%)
+          Hdfs split stats (&lt;volume id&gt;:&lt;# splits&gt;/&lt;split 
lengths&gt;): 0:1/33.00 B
+          Hdfs Read Thread Concurrency Bucket: 0:0% 1:0%
+          File Formats: TEXT/NONE:1
+          ExecOption: Codegen enabled: 1 out of 1
+           - AverageHdfsReadThreadConcurrency: 0.00
+           - AverageScannerThreadConcurrency: 0.00
+           - BytesRead: 33.00 B
+           - BytesReadLocal: 33.00 B
+           - BytesReadShortCircuit: 33.00 B
+           - NumDisksAccessed: 1
+           - NumScannerThreadsStarted: 1
+           - PeakMemoryUsage: 46.00 KB
+           - PerReadThreadRawHdfsThroughput: 287.52 KB/sec
+           - RowsRead: 3
+           - RowsReturned: 3
+           - RowsReturnedRate: 220.33 K/sec
+           - ScanRangesComplete: 1
+           - ScannerThreadsInvoluntaryContextSwitches: 26
+           - ScannerThreadsTotalWallClockTime: 55.199ms
+             - DelimiterParseTime: 2.463us
+             - MaterializeTupleTime(*): 1.226us
+             - ScannerThreadsSysTime: 0ns
+             - ScannerThreadsUserTime: 42.993ms
+           - ScannerThreadsVoluntaryContextSwitches: 1
+           - TotalRawHdfsReadTime(*): 112.86us
+           - TotalReadThroughput: 0.00 /sec
+    Fragment 2:
+      Instance 6540a03d4bee0691:4963d6269b210ec0 
(host=impala-1.example.com:22000):(Active: 190.120ms, % non-child: 0.00%)
+        Hdfs split stats (&lt;volume id&gt;:&lt;# splits&gt;/&lt;split 
lengths&gt;): 0:15/960.00 KB
+         - AverageThreadTokens: 0.00
+         - PeakMemoryUsage: 906.33 KB
+         - PrepareTime: 3.67ms
+         - RowsProduced: 98.30K (98304)
+         - TotalCpuTime: 403.351ms
+         - TotalNetworkWaitTime: 34.999ms
+         - TotalStorageWaitTime: 108.675ms
+        CodeGen:(Active: 162.57ms, % non-child: 85.24%)
+           - CodegenTime: 3.133ms
+           - CompileTime: 148.316ms
+           - LoadTime: 12.317ms
+           - ModuleFileSize: 95.27 KB
+        DataStreamSender (dst_id=3):(Active: 70.620ms, % non-child: 37.14%)
+           - BytesSent: 1.15 MB
+           - NetworkThroughput(*): 23.30 MB/sec
+           - OverallThroughput: 16.23 MB/sec
+           - PeakMemoryUsage: 5.33 KB
+           - SerializeBatchTime: 22.69ms
+           - ThriftTransmitTime(*): 49.178ms
+           - UncompressedRowBatchSize: 3.28 MB
+        HDFS_SCAN_NODE (id=1):(Active: 118.839ms, % non-child: 62.51%)
+          Hdfs split stats (&lt;volume id&gt;:&lt;# splits&gt;/&lt;split 
lengths&gt;): 0:15/960.00 KB
+          Hdfs Read Thread Concurrency Bucket: 0:0% 1:0%
+          File Formats: TEXT/NONE:15
+          ExecOption: Codegen enabled: 15 out of 15
+           - AverageHdfsReadThreadConcurrency: 0.00
+           - AverageScannerThreadConcurrency: 0.00
+           - BytesRead: 960.00 KB
+           - BytesReadLocal: 960.00 KB
+           - BytesReadShortCircuit: 960.00 KB
+           - NumDisksAccessed: 1
+           - NumScannerThreadsStarted: 1
+           - PeakMemoryUsage: 869.00 KB
+           - PerReadThreadRawHdfsThroughput: 130.21 MB/sec
+           - RowsRead: 98.30K (98304)
+           - RowsReturned: 98.30K (98304)
+           - RowsReturnedRate: 827.20 K/sec
+           - ScanRangesComplete: 15
+           - ScannerThreadsInvoluntaryContextSwitches: 34
+           - ScannerThreadsTotalWallClockTime: 189.774ms
+             - DelimiterParseTime: 15.703ms
+             - MaterializeTupleTime(*): 3.419ms
+             - ScannerThreadsSysTime: 1.999ms
+             - ScannerThreadsUserTime: 44.993ms
+           - ScannerThreadsVoluntaryContextSwitches: 118
+           - TotalRawHdfsReadTime(*): 7.199ms
+           - TotalReadThroughput: 0.00 /sec</codeblock>
+    </conbody>
+  </concept>
+</concept>

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/3c2c8f12/docs/topics/impala_float.xml
----------------------------------------------------------------------
diff --git a/docs/topics/impala_float.xml b/docs/topics/impala_float.xml
index 51e3311..8ef1144 100644
--- a/docs/topics/impala_float.xml
+++ b/docs/topics/impala_float.xml
@@ -3,7 +3,7 @@
 <concept id="float">
 
   <title>FLOAT Data Type</title>
-  <titlealts><navtitle>FLOAT</navtitle></titlealts>
+  <titlealts audience="PDF"><navtitle>FLOAT</navtitle></titlealts>
   <prolog>
     <metadata>
       <data name="Category" value="Impala"/>
@@ -68,11 +68,11 @@ SELECT CAST(1000.5 AS FLOAT);
 
     <p conref="../shared/impala_common.xml#common/text_bulky"/>
 
-<!-- <p 
conref="/Content/impala_common_xi44078.xml#common/compatibility_blurb"/> -->
+<!-- <p conref="../shared/impala_common.xml#common/compatibility_blurb"/> -->
 
     <p conref="../shared/impala_common.xml#common/internals_4_bytes"/>
 
-<!-- <p conref="/Content/impala_common_xi44078.xml#common/added_in_20"/> -->
+<!-- <p conref="../shared/impala_common.xml#common/added_in_20"/> -->
 
     <p conref="../shared/impala_common.xml#common/column_stats_constant"/>
 

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/3c2c8f12/docs/topics/impala_functions.xml
----------------------------------------------------------------------
diff --git a/docs/topics/impala_functions.xml b/docs/topics/impala_functions.xml
index 527744b..55a36dc 100644
--- a/docs/topics/impala_functions.xml
+++ b/docs/topics/impala_functions.xml
@@ -3,7 +3,7 @@
 <concept id="builtins">
 
   <title id="title_functions">Impala Built-In Functions</title>
-  <titlealts><navtitle>Built-In Functions</navtitle></titlealts>
+  <titlealts audience="PDF"><navtitle>Built-In Functions</navtitle></titlealts>
   <prolog>
     <metadata>
       <data name="Category" value="Impala"/>
@@ -17,9 +17,9 @@
 
   <conbody>
 
-    <draft-comment translate="no">
-Opportunity to conref some material between here and the "Functions" topic 
under "Schema Objects".
-</draft-comment>
+    <!-- To do:
+      Opportunity to conref some material between here and the "Functions" 
topic under "Schema Objects".
+    -->
 
     <p>
       Impala supports several categories of built-in functions. These 
functions let you perform mathematical
@@ -152,7 +152,7 @@ select max(height), avg(height) from census_data where age 
&gt; 20;
     <p rev="2.0.0">
       Analytic functions are a variation on aggregate functions. Instead of 
returning a single value, or an
       identical value for each group of rows, they can compute values that 
vary based on a <q>window</q> consisting
-      of of other rows around them in the result set.
+      of other rows around them in the result set.
     </p>
 
     <p outputclass="toc"/>

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/3c2c8f12/docs/topics/impala_functions_overview.xml
----------------------------------------------------------------------
diff --git a/docs/topics/impala_functions_overview.xml 
b/docs/topics/impala_functions_overview.xml
index 26a4d35..0e3973b 100644
--- a/docs/topics/impala_functions_overview.xml
+++ b/docs/topics/impala_functions_overview.xml
@@ -2,7 +2,7 @@
 <concept id="functions">
 
   <title>Overview of Impala Functions</title>
-  <titlealts><navtitle>Functions</navtitle></titlealts>
+  <titlealts audience="PDF"><navtitle>Functions</navtitle></titlealts>
   <prolog>
     <metadata>
       <data name="Category" value="Impala"/>

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/3c2c8f12/docs/topics/impala_grant.xml
----------------------------------------------------------------------
diff --git a/docs/topics/impala_grant.xml b/docs/topics/impala_grant.xml
index ddbd39c..ca45c0a 100644
--- a/docs/topics/impala_grant.xml
+++ b/docs/topics/impala_grant.xml
@@ -3,21 +3,25 @@
 <concept rev="2.0.0" id="grant">
 
   <title>GRANT Statement (CDH 5.2 or higher only)</title>
-  <titlealts><navtitle>GRANT (CDH 5.2 or higher only)</navtitle></titlealts>
+  <titlealts audience="PDF"><navtitle>GRANT</navtitle></titlealts>
   <prolog>
     <metadata>
       <data name="Category" value="Impala"/>
       <data name="Category" value="DDL"/>
       <data name="Category" value="SQL"/>
+      <data name="Category" value="Security"/>
       <data name="Category" value="Sentry"/>
       <data name="Category" value="Roles"/>
+      <data name="Category" value="Administrators"/>
+      <data name="Category" value="Developers"/>
+      <data name="Category" value="Data Analysts"/>
       <!-- Consider whether to go deeper into categories like Security for the 
Sentry-related statements. -->
     </metadata>
   </prolog>
 
   <conbody>
 
-    <p>
+    <p rev="2.0.0">
       <indexterm audience="Cloudera">GRANT statement</indexterm>
 <!-- Copied from Sentry docs. Turn into conref. I did some rewording for 
clarity. -->
       The <codeph>GRANT</codeph> statement grants roles or privileges on 
specified objects to groups. Only Sentry
@@ -69,8 +73,8 @@ object_type ::= TABLE | DATABASE | SERVER | URI
 
     <p rev="2.3.0 collevelauth">
       The ability to grant or revoke <codeph>SELECT</codeph> privilege on 
specific columns is available
-      in CDH 5.5 / Impala 2.3 and higher. <!--See <xref 
href="sg_hive_sql.xml#concept_c2q_4qx_p4/col_level_auth_sentry"/>
-      for details.-->
+      in CDH 5.5 / Impala 2.3 and higher. See <xref 
href="sg_hive_sql.xml#concept_c2q_4qx_p4/col_level_auth_sentry"/>
+      for details.
     </p>
 
 <!-- Turn compatibility info into a conref or series of conrefs. (In both 
GRANT and REVOKE.) -->

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/3c2c8f12/docs/topics/impala_group_by.xml
----------------------------------------------------------------------
diff --git a/docs/topics/impala_group_by.xml b/docs/topics/impala_group_by.xml
index 10b7de4..055ae2a 100644
--- a/docs/topics/impala_group_by.xml
+++ b/docs/topics/impala_group_by.xml
@@ -9,6 +9,8 @@
       <data name="Category" value="SQL"/>
       <data name="Category" value="Querying"/>
       <data name="Category" value="Aggregate Functions"/>
+      <data name="Category" value="Developers"/>
+      <data name="Category" value="Data Analysts"/>
     </metadata>
   </prolog>
 

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/3c2c8f12/docs/topics/impala_group_concat.xml
----------------------------------------------------------------------
diff --git a/docs/topics/impala_group_concat.xml 
b/docs/topics/impala_group_concat.xml
index b2a7ff6..0971875 100644
--- a/docs/topics/impala_group_concat.xml
+++ b/docs/topics/impala_group_concat.xml
@@ -3,7 +3,7 @@
 <concept rev="1.2" id="group_concat">
 
   <title>GROUP_CONCAT Function</title>
-  <titlealts><navtitle>GROUP_CONCAT</navtitle></titlealts>
+  <titlealts audience="PDF"><navtitle>GROUP_CONCAT</navtitle></titlealts>
   <prolog>
     <metadata>
       <data name="Category" value="Impala"/>
@@ -11,6 +11,8 @@
       <data name="Category" value="Impala Functions"/>
       <data name="Category" value="Aggregate Functions"/>
       <data name="Category" value="Querying"/>
+      <data name="Category" value="Developers"/>
+      <data name="Category" value="Data Analysts"/>
     </metadata>
   </prolog>
 

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/3c2c8f12/docs/topics/impala_hadoop.xml
----------------------------------------------------------------------
diff --git a/docs/topics/impala_hadoop.xml b/docs/topics/impala_hadoop.xml
index a3700c6..7941a47 100644
--- a/docs/topics/impala_hadoop.xml
+++ b/docs/topics/impala_hadoop.xml
@@ -4,7 +4,16 @@
 
   <title>How Impala Fits Into the Hadoop Ecosystem</title>
   <titlealts audience="PDF"><navtitle>Role in the Hadoop 
Ecosystem</navtitle></titlealts>
-  
+  <prolog>
+    <metadata>
+      <data name="Category" value="Impala"/>
+      <data name="Category" value="Concepts"/>
+      <data name="Category" value="Hadoop"/>
+      <data name="Category" value="Administrators"/>
+      <data name="Category" value="Developers"/>
+      <data name="Category" value="Data Analysts"/>
+    </metadata>
+  </prolog>
 
   <conbody>
 
@@ -14,7 +23,143 @@
       ELT pipelines.
     </p>
 
-  
+    <p outputclass="toc inpage"/>
+  </conbody>
+
+  <concept id="intro_hive">
+
+    <title>How Impala Works with Hive</title>
+
+    <conbody>
+
+      <p>
+        A major Impala goal is to make SQL-on-Hadoop operations fast and 
efficient enough to appeal to new
+        categories of users and open up Hadoop to new types of use cases. 
Where practical, it makes use of existing
+        Apache Hive infrastructure that many Hadoop users already have in 
place to perform long-running,
+        batch-oriented SQL queries.
+      </p>
+
+      <p>
+        In particular, Impala keeps its table definitions in a traditional 
MySQL or PostgreSQL database known as
+        the <b>metastore</b>, the same database where Hive keeps this type of 
data. Thus, Impala can access tables
+        defined or loaded by Hive, as long as all columns use Impala-supported 
data types, file formats, and
+        compression codecs.
+      </p>
+
+      <p>
+        The initial focus on query features and performance means that Impala 
can read more types of data with the
+        <codeph>SELECT</codeph> statement than it can write with the 
<codeph>INSERT</codeph> statement. To query
+        data using the Avro, RCFile, or SequenceFile <xref 
href="impala_file_formats.xml#file_formats">file
+        formats</xref>, you load the data using Hive.
+      </p>
+
+      <p rev="1.2.2">
+        The Impala query optimizer can also make use of <xref 
href="impala_perf_stats.xml#perf_table_stats">table
+        statistics</xref> and <xref 
href="impala_perf_stats.xml#perf_column_stats">column statistics</xref>.
+        Originally, you gathered this information with the <codeph>ANALYZE 
TABLE</codeph> statement in Hive; in
+        Impala 1.2.2 and higher, use the Impala <codeph><xref 
href="impala_compute_stats.xml#compute_stats">COMPUTE
+        STATS</xref></codeph> statement instead. <codeph>COMPUTE 
STATS</codeph> requires less setup, is more
+        reliable, and does not require switching back and forth between 
<cmdname>impala-shell</cmdname>
+        and the Hive shell.
+      </p>
+    </conbody>
+  </concept>
+
+  <concept id="intro_metastore">
+
+    <title>Overview of Impala Metadata and the Metastore</title>
+  <prolog>
+    <metadata>
+      <data name="Category" value="Concepts"/>
+      <data name="Category" value="Impala"/>
+      <data name="Category" value="Hive"/>
+    </metadata>
+  </prolog>
+
+    <conbody>
+
+      <p>
+        As discussed in <xref href="impala_hadoop.xml#intro_hive"/>, Impala 
maintains information about table
+        definitions in a central database known as the <b>metastore</b>. 
Impala also tracks other metadata for the
+        low-level characteristics of data files:
+      </p>
+
+      <ul>
+        <li>
+          The physical locations of blocks within HDFS.
+        </li>
+      </ul>
+
+      <p>
+        For tables with a large volume of data and/or many partitions, 
retrieving all the metadata for a table can
+        be time-consuming, taking minutes in some cases. Thus, each Impala 
node caches all of this metadata to
+        reuse for future queries against the same table.
+      </p>
+
+      <p rev="1.2">
+        If the table definition or the data in the table is updated, all other 
Impala daemons in the cluster must
+        receive the latest metadata, replacing the obsolete cached metadata, 
before issuing a query against that
+        table. In Impala 1.2 and higher, the metadata update is automatic, 
coordinated through the
+        <cmdname>catalogd</cmdname> daemon, for all DDL and DML statements 
issued through Impala. See
+        <xref href="impala_components.xml#intro_catalogd"/> for details.
+      </p>
+
+      <p>
+        For DDL and DML issued through Hive, or changes made manually to files 
in HDFS, you still use the
+        <codeph>REFRESH</codeph> statement (when new data files are added to 
existing tables) or the
+        <codeph>INVALIDATE METADATA</codeph> statement (for entirely new 
tables, or after dropping a table,
+        performing an HDFS rebalance operation, or deleting data files). 
Issuing <codeph>INVALIDATE
+        METADATA</codeph> by itself retrieves metadata for all the tables 
tracked by the metastore. If you know
+        that only specific tables have been changed outside of Impala, you can 
issue <codeph>REFRESH
+        <varname>table_name</varname></codeph> for each affected table to only 
retrieve the latest metadata for
+        those tables.
+      </p>
+    </conbody>
+  </concept>
+
+  <concept id="intro_hdfs">
+
+    <title>How Impala Uses HDFS</title>
+  <prolog>
+    <metadata>
+      <data name="Category" value="Concepts"/>
+      <data name="Category" value="Impala"/>
+      <data name="Category" value="HDFS"/>
+    </metadata>
+  </prolog>
+
+    <conbody>
+
+      <p>
+        Impala uses the distributed filesystem HDFS as its primary data 
storage medium. Impala relies on the
+        redundancy provided by HDFS to guard against hardware or network 
outages on individual nodes. Impala table
+        data is physically represented as data files in HDFS, using familiar 
HDFS file formats and compression
+        codecs. When data files are present in the directory for a new table, 
Impala reads them all, regardless of
+        file name. New data is added in files with names controlled by Impala.
+      </p>
     </conbody>
   </concept>
 
+  <concept id="intro_hbase">
+
+    <title>How Impala Uses HBase</title>
+  <prolog>
+    <metadata>
+      <data name="Category" value="Concepts"/>
+      <data name="Category" value="Impala"/>
+      <data name="Category" value="HBase"/>
+    </metadata>
+  </prolog>
+
+    <conbody>
+
+      <p>
+        HBase is an alternative to HDFS as a storage medium for Impala data. 
It is a database storage system built
+        on top of HDFS, without built-in SQL support. Many Hadoop users 
already have it configured and store large
+        (often sparse) data sets in it. By defining tables in Impala and 
mapping them to equivalent tables in
+        HBase, you can query the contents of the HBase tables through Impala, 
and even perform join queries
+        including both Impala and HBase tables. See <xref 
href="impala_hbase.xml#impala_hbase"/> for details.
+      </p>
+    </conbody>
+  </concept>
+</concept>

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/3c2c8f12/docs/topics/impala_having.xml
----------------------------------------------------------------------
diff --git a/docs/topics/impala_having.xml b/docs/topics/impala_having.xml
index 064a4a8..2de32bb 100644
--- a/docs/topics/impala_having.xml
+++ b/docs/topics/impala_having.xml
@@ -9,6 +9,8 @@
       <data name="Category" value="SQL"/>
       <data name="Category" value="Querying"/>
       <data name="Category" value="Aggregate Functions"/>
+      <data name="Category" value="Developers"/>
+      <data name="Category" value="Data Analysts"/>
     </metadata>
   </prolog>

[17/23] incubator-impala git commit: Update all impala* files to the latest CDH 5.9/5.10 versions.

Reply via email to