[41/51] [partial] incubator-impala git commit: IMPALA-3398: Add docs to main Impala branch.

jbapple Thu, 17 Nov 2016 15:12:27 -0800

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/3be0f122/docs/topics/impala_char.xml
----------------------------------------------------------------------
diff --git a/docs/topics/impala_char.xml b/docs/topics/impala_char.xml
new file mode 100644
index 0000000..0298d57
--- /dev/null
+++ b/docs/topics/impala_char.xml
@@ -0,0 +1,278 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE concept PUBLIC "-//OASIS//DTD DITA Concept//EN" "concept.dtd">
+<concept id="char" rev="2.0.0">
+
+  <title>CHAR Data Type (<keyword keyref="impala20"/> or higher only)</title>
+  <titlealts audience="PDF"><navtitle>CHAR</navtitle></titlealts>
+  <prolog>
+    <metadata>
+      <data name="Category" value="Impala"/>
+      <data name="Category" value="Impala Data Types"/>
+      <data name="Category" value="SQL"/>
+      <data name="Category" value="Data Analysts"/>
+      <data name="Category" value="Developers"/>
+      <data name="Category" value="Schemas"/>
+    </metadata>
+  </prolog>
+
+  <conbody>
+
+    <p rev="2.0.0">
+      <indexterm audience="Cloudera">CHAR data type</indexterm>
+      A fixed-length character type, padded with trailing spaces if necessary 
to achieve the specified length. If
+      values are longer than the specified length, Impala truncates any 
trailing characters.
+    </p>
+
+    <p conref="../shared/impala_common.xml#common/syntax_blurb"/>
+
+    <p>
+      In the column definition of a <codeph>CREATE TABLE</codeph> statement:
+    </p>
+
+<codeblock><varname>column_name</varname> 
CHAR(<varname>length</varname>)</codeblock>
+
+    <p>
+      The maximum length you can specify is 255.
+    </p>
+
+    <p>
+      <b>Semantics of trailing spaces:</b>
+    </p>
+
+    <ul>
+      <li>
+        When you store a <codeph>CHAR</codeph> value shorter than the 
specified length in a table, queries return
+        the value padded with trailing spaces if necessary; the resulting 
value has the same length as specified in
+        the column definition.
+      </li>
+
+      <li>
+        If you store a <codeph>CHAR</codeph> value containing trailing spaces 
in a table, those trailing spaces are
+        not stored in the data file. When the value is retrieved by a query, 
the result could have a different
+        number of trailing spaces. That is, the value includes however many 
spaces are needed to pad it to the
+        specified length of the column.
+      </li>
+
+      <li>
+        If you compare two <codeph>CHAR</codeph> values that differ only in 
the number of trailing spaces, those
+        values are considered identical.
+      </li>
+    </ul>
+
+    <p conref="../shared/impala_common.xml#common/partitioning_bad"/>
+
+    <p conref="../shared/impala_common.xml#common/hbase_no"/>
+
+    <p conref="../shared/impala_common.xml#common/parquet_blurb"/>
+
+    <ul>
+      <li>
+        This type can be read from and written to Parquet files.
+      </li>
+
+      <li>
+        There is no requirement for a particular level of Parquet.
+      </li>
+
+      <li>
+        Parquet files generated by Impala and containing this type can be 
freely interchanged with other components
+        such as Hive and MapReduce.
+      </li>
+
+      <li>
+        Any trailing spaces, whether implicitly or explicitly specified, are 
not written to the Parquet data files.
+      </li>
+
+      <li>
+        Parquet data files might contain values that are longer than allowed 
by the
+        <codeph>CHAR(<varname>n</varname>)</codeph> length limit. Impala 
ignores any extra trailing characters when
+        it processes those values during a query.
+      </li>
+    </ul>
+
+    <p conref="../shared/impala_common.xml#common/text_blurb"/>
+
+    <p>
+      Text data files might contain values that are longer than allowed for a 
particular
+      <codeph>CHAR(<varname>n</varname>)</codeph> column. Any extra trailing 
characters are ignored when Impala
+      processes those values during a query. Text data files can also contain 
values that are shorter than the
+      defined length limit, and Impala pads them with trailing spaces up to 
the specified length. Any text data
+      files produced by Impala <codeph>INSERT</codeph> statements do not 
include any trailing blanks for
+      <codeph>CHAR</codeph> columns.
+    </p>
+
+    <p><b>Avro considerations:</b></p>
+    <p conref="../shared/impala_common.xml#common/avro_2gb_strings"/>
+
+    <p conref="../shared/impala_common.xml#common/compatibility_blurb"/>
+
+    <p>
+      This type is available using Impala 2.0 or higher under CDH 4, or with 
Impala on CDH 5.2 or higher. There are
+      no compatibility issues with other components when exchanging data files 
or running Impala on CDH 4.
+    </p>
+
+    <p>
+      Some other database systems make the length specification optional. For 
Impala, the length is required.
+    </p>
+
+<!--
+<p>
+The Impala maximum length is larger than for the <codeph>CHAR</codeph> data 
type in Hive.
+If a Hive query encounters a <codeph>CHAR</codeph> value longer than 255 
during processing,
+it silently treats the value as length 255.
+</p>
+-->
+
+    <p conref="../shared/impala_common.xml#common/internals_max_bytes"/>
+
+    <p conref="../shared/impala_common.xml#common/added_in_20"/>
+
+    <p conref="../shared/impala_common.xml#common/column_stats_constant"/>
+
+<!-- Seems like a logical design decision but don't think it's currently 
implemented like this.
+<p>
+Because both the maximum and average length are always known and always the 
same for
+any given <codeph>CHAR(<varname>n</varname>)</codeph> column, those fields are 
always filled
+in for <codeph>SHOW COLUMN STATS</codeph> output, even before you run
+<codeph>COMPUTE STATS</codeph> on the table.
+</p>
+-->
+
+    <p conref="../shared/impala_common.xml#common/udf_blurb_no"/>
+
+    <p conref="../shared/impala_common.xml#common/example_blurb"/>
+
+    <p>
+      These examples show how trailing spaces are not considered significant 
when comparing or processing
+      <codeph>CHAR</codeph> values. <codeph>CAST()</codeph> truncates any 
longer string to fit within the defined
+      length. If a <codeph>CHAR</codeph> value is shorter than the specified 
length, it is padded on the right with
+      spaces until it matches the specified length. Therefore, 
<codeph>LENGTH()</codeph> represents the length
+      including any trailing spaces, and <codeph>CONCAT()</codeph> also treats 
the column value as if it has
+      trailing spaces.
+    </p>
+
+<codeblock>select cast('x' as char(4)) = cast('x   ' as char(4)) as "unpadded 
equal to padded";
++--------------------------+
+| unpadded equal to padded |
++--------------------------+
+| true                     |
++--------------------------+
+
+create table char_length(c char(3));
+insert into char_length values (cast('1' as char(3))), (cast('12' as 
char(3))), (cast('123' as char(3))), (cast('123456' as char(3)));
+select concat("[",c,"]") as c, length(c) from char_length;
++-------+-----------+
+| c     | length(c) |
++-------+-----------+
+| [1  ] | 3         |
+| [12 ] | 3         |
+| [123] | 3         |
+| [123] | 3         |
++-------+-----------+
+</codeblock>
+
+    <p>
+      This example shows a case where data values are known to have a specific 
length, where <codeph>CHAR</codeph>
+      is a logical data type to use.
+<!--
+Because all the <codeph>CHAR</codeph> values have a constant predictable 
length,
+Impala can efficiently analyze how best to use these values in join queries,
+aggregation queries, and other contexts where column length is significant.
+-->
+    </p>
+
+<codeblock>create table addresses
+  (id bigint,
+   street_name string,
+   state_abbreviation char(2),
+   country_abbreviation char(2));
+</codeblock>
+
+    <p>
+      The following example shows how values written by Impala do not 
physically include the trailing spaces. It
+      creates a table using text format, with <codeph>CHAR</codeph> values 
much shorter than the declared length,
+      and then prints the resulting data file to show that the delimited 
values are not separated by spaces. The
+      same behavior applies to binary-format Parquet data files.
+    </p>
+
+<codeblock>create table char_in_text (a char(20), b char(30), c char(40))
+  row format delimited fields terminated by ',';
+
+insert into char_in_text values (cast('foo' as char(20)), cast('bar' as 
char(30)), cast('baz' as char(40))), (cast('hello' as char(20)), cast('goodbye' 
as char(30)), cast('aloha' as char(40)));
+
+-- Running this Linux command inside impala-shell using the ! shortcut.
+!hdfs dfs -cat 
'hdfs://127.0.0.1:8020/user/hive/warehouse/impala_doc_testing.db/char_in_text/*.*';
+foo,bar,baz
+hello,goodbye,aloha
+</codeblock>
+
+    <p>
+      The following example further illustrates the treatment of spaces. It 
replaces the contents of the previous
+      table with some values including leading spaces, trailing spaces, or 
both. Any leading spaces are preserved
+      within the data file, but trailing spaces are discarded. Then when the 
values are retrieved by a query, the
+      leading spaces are retrieved verbatim while any necessary trailing 
spaces are supplied by Impala.
+    </p>
+
+<codeblock>insert overwrite char_in_text values (cast('trailing   ' as 
char(20)), cast('   leading and trailing   ' as char(30)), cast('   leading' as 
char(40)));
+!hdfs dfs -cat 
'hdfs://127.0.0.1:8020/user/hive/warehouse/impala_doc_testing.db/char_in_text/*.*';
+trailing,   leading and trailing,   leading
+
+select concat('[',a,']') as a, concat('[',b,']') as b, concat('[',c,']') as c 
from char_in_text;
++------------------------+----------------------------------+--------------------------------------------+
+| a                      | b                                | c                
                          |
++------------------------+----------------------------------+--------------------------------------------+
+| [trailing            ] | [   leading and trailing       ] | [   leading      
                        ] |
++------------------------+----------------------------------+--------------------------------------------+
+</codeblock>
+
+    <p conref="../shared/impala_common.xml#common/restrictions_blurb"/>
+
+    <p>
+      Because the blank-padding behavior requires allocating the maximum 
length for each value in memory, for
+      scalability reasons avoid declaring <codeph>CHAR</codeph> columns that 
are much longer than typical values in
+      that column.
+    </p>
+
+    <p conref="../shared/impala_common.xml#common/blobs_are_strings"/>
+
+    <p>
+      When an expression compares a <codeph>CHAR</codeph> with a 
<codeph>STRING</codeph> or
+      <codeph>VARCHAR</codeph>, the <codeph>CHAR</codeph> value is implicitly 
converted to <codeph>STRING</codeph>
+      first, with trailing spaces preserved.
+    </p>
+
+<codeblock>select cast("foo  " as char(5)) = 'foo' as "char equal to string";
++----------------------+
+| char equal to string |
++----------------------+
+| false                |
++----------------------+
+</codeblock>
+
+    <p>
+      This behavior differs from other popular database systems. To get the 
expected result of
+      <codeph>TRUE</codeph>, cast the expressions on both sides to 
<codeph>CHAR</codeph> values of the appropriate
+      length:
+    </p>
+
+<codeblock>select cast("foo  " as char(5)) = cast('foo' as char(3)) as "char 
equal to string";
++----------------------+
+| char equal to string |
++----------------------+
+| true                 |
++----------------------+
+</codeblock>
+
+    <p>
+      This behavior is subject to change in future releases.
+    </p>
+
+    <p conref="../shared/impala_common.xml#common/related_info"/>
+
+    <p>
+      <xref href="impala_string.xml#string"/>, <xref 
href="impala_varchar.xml#varchar"/>,
+      <xref href="impala_literals.xml#string_literals"/>,
+      <xref href="impala_string_functions.xml#string_functions"/>
+    </p>
+  </conbody>
+</concept>


http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/3be0f122/docs/topics/impala_cluster_sizing.xml
----------------------------------------------------------------------
diff --git a/docs/topics/impala_cluster_sizing.xml 
b/docs/topics/impala_cluster_sizing.xml
new file mode 100644
index 0000000..382f68c
--- /dev/null
+++ b/docs/topics/impala_cluster_sizing.xml
@@ -0,0 +1,353 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE concept PUBLIC "-//OASIS//DTD DITA Concept//EN" "concept.dtd">
+<concept id="cluster_sizing">
+
+  <title>Cluster Sizing Guidelines for Impala</title>
+  <titlealts audience="PDF"><navtitle>Cluster Sizing</navtitle></titlealts>
+  <prolog>
+    <metadata>
+      <data name="Category" value="Impala"/>
+      <data name="Category" value="Clusters"/>
+      <data name="Category" value="Planning"/>
+      <data name="Category" value="Sizing"/>
+      <data name="Category" value="Deploying"/>
+      <!-- Hoist by my own petard. Memory is an important theme of this topic 
but that's in a <section> title. -->
+      <data name="Category" value="Sectionated Pages"/>
+      <data name="Category" value="Memory"/>
+      <data name="Category" value="Scalability"/>
+      <data name="Category" value="Proof of Concept"/>
+      <data name="Category" value="Requirements"/>
+      <data name="Category" value="Guidelines"/>
+      <data name="Category" value="Best Practices"/>
+      <data name="Category" value="Administrators"/>
+    </metadata>
+  </prolog>
+
+  <conbody>
+
+    <p>
+      <indexterm audience="Cloudera">cluster sizing</indexterm>
+      This document provides a very rough guideline to estimate the size of a 
cluster needed for a specific
+      customer application. You can use this information when planning how 
much and what type of hardware to
+      acquire for a new cluster, or when adding Impala workloads to an 
existing cluster.
+    </p>
+
+    <note>
+      Before making purchase or deployment decisions, consult your Cloudera 
representative to verify the
+      conclusions about hardware requirements based on your data volume and 
workload.
+    </note>
+
+<!--    <p outputclass="toc inpage"/> -->
+
+    <p>
+      Always use hosts with identical specifications and capacities for all 
the nodes in the cluster. Currently,
+      Impala divides the work evenly between cluster nodes, regardless of 
their exact hardware configuration.
+      Because work can be distributed in different ways for different queries, 
if some hosts are overloaded
+      compared to others in terms of CPU, memory, I/O, or network, you might 
experience inconsistent performance
+      and overall slowness
+    </p>
+
+    <p>
+      For analytic workloads with star/snowflake schemas, and using consistent 
hardware for all nodes (64 GB RAM,
+      12 2 TB hard drives, 2x E5-2630L 12 cores total, 10 GB network), the 
following table estimates the number of
+      DataNodes needed in the cluster based on data size and the number of 
concurrent queries, for workloads
+      similar to TPC-DS benchmark queries:
+    </p>
+
+    <table>
+      <title>Cluster size estimation based on the number of concurrent queries 
and data size with a 20 second average query response time</title>
+      <tgroup cols="6">
+        <colspec colnum="1" colname="col1"/>
+        <colspec colnum="2" colname="col2"/>
+        <colspec colnum="3" colname="col3"/>
+        <colspec colnum="4" colname="col4"/>
+        <colspec colnum="5" colname="col5"/>
+        <colspec colnum="6" colname="col6"/>
+        <thead>
+          <row>
+            <entry>
+              Data Size
+            </entry>
+            <entry>
+              1 query
+            </entry>
+            <entry>
+              10 queries
+            </entry>
+            <entry>
+              100 queries
+            </entry>
+            <entry>
+              1000 queries
+            </entry>
+            <entry>
+              2000 queries
+            </entry>
+          </row>
+        </thead>
+        <tbody>
+          <row>
+            <entry>
+              <b>250 GB</b>
+            </entry>
+            <entry>
+              2
+            </entry>
+            <entry>
+              2
+            </entry>
+            <entry>
+              5
+            </entry>
+            <entry>
+              35
+            </entry>
+            <entry>
+              70
+            </entry>
+          </row>
+          <row>
+            <entry>
+              <b>500 GB</b>
+            </entry>
+            <entry>
+              2
+            </entry>
+            <entry>
+              2
+            </entry>
+            <entry>
+              10
+            </entry>
+            <entry>
+              70
+            </entry>
+            <entry>
+              135
+            </entry>
+          </row>
+          <row>
+            <entry>
+              <b>1 TB</b>
+            </entry>
+            <entry>
+              2
+            </entry>
+            <entry>
+              2
+            </entry>
+            <entry>
+              15
+            </entry>
+            <entry>
+              135
+            </entry>
+            <entry>
+              270
+            </entry>
+          </row>
+          <row>
+            <entry>
+              <b>15 TB</b>
+            </entry>
+            <entry>
+              2
+            </entry>
+            <entry>
+              20
+            </entry>
+            <entry>
+              200
+            </entry>
+            <entry>
+              N/A
+            </entry>
+            <entry>
+              N/A
+            </entry>
+          </row>
+          <row>
+            <entry>
+              <b>30 TB</b>
+            </entry>
+            <entry>
+              4
+            </entry>
+            <entry>
+              40
+            </entry>
+            <entry>
+              400
+            </entry>
+            <entry>
+              N/A
+            </entry>
+            <entry>
+              N/A
+            </entry>
+          </row>
+          <row>
+            <entry>
+              <b>60 TB</b>
+            </entry>
+            <entry>
+              8
+            </entry>
+            <entry>
+              80
+            </entry>
+            <entry>
+              800
+            </entry>
+            <entry>
+              N/A
+            </entry>
+            <entry>
+              N/A
+            </entry>
+          </row>
+        </tbody>
+      </tgroup>
+    </table>
+
+    <section id="sizing_factors">
+
+      <title>Factors Affecting Scalability</title>
+
+      <p>
+        A typical analytic workload (TPC-DS style queries) using recommended 
hardware is usually CPU-bound. Each
+        node can process roughly 1.6 GB/sec. Both CPU-bound and disk-bound 
workloads can scale almost linearly with
+        cluster size. However, for some workloads, the scalability might be 
bounded by the network, or even by
+        memory.
+      </p>
+
+      <p>
+        If the workload is already network bound (on a 10 GB network), 
increasing the cluster size wonât reduce
+        the network load; in fact, a larger cluster could increase network 
traffic because some queries involve
+        <q>broadcast</q> operations to all DataNodes. Therefore, boosting the 
cluster size does not improve query
+        throughput in a network-constrained environment.
+      </p>
+
+      <p>
+        Letâs look at a memory-bound workload. A workload is memory-bound if 
Impala cannot run any additional
+        concurrent queries because all memory allocated has already been 
consumed, but neither CPU, disk, nor
+        network is saturated yet. This can happen because currently Impala 
uses only a single core per node to
+        process join and aggregation queries. For a node with 128 GB of RAM, 
if a join node takes 50 GB, the system
+        cannot run more than 2 such queries at the same time.
+      </p>
+
+      <p>
+        Therefore, at most 2 cores are used. Throughput can still scale almost 
linearly even for a memory-bound
+        workload. Itâs just that the CPU will not be saturated. Per-node 
throughput will be lower than 1.6
+        GB/sec. Consider increasing the memory per node.
+      </p>
+
+      <p>
+        As long as the workload is not network- or memory-bound, we can use 
the 1.6 GB/second per node as the
+        throughput estimate.
+      </p>
+    </section>
+
+    <section id="sizing_details">
+
+      <title>A More Precise Approach</title>
+
+      <p>
+        A more precise sizing estimate would require not only queries per 
minute (QPM), but also an average data
+        size scanned per query (D). With the proper partitioning strategy, D 
is usually a fraction of the total
+        data size. The following equation can be used as a rough guide to 
estimate the number of nodes (N) needed:
+      </p>
+
+<codeblock>Eq 1: N &gt; QPM * D / 100 GB
+</codeblock>
+
+      <p>
+        Here is an example. Suppose, on average, a query scans 50 GB of data 
and the average response time is
+        required to be 15 seconds or less when there are 100 concurrent 
queries. The QPM is 100/15*60 = 400. We can
+        estimate the number of node using our equation above.
+      </p>
+
+<codeblock>N &gt; QPM * D / 100GB
+N &gt; 400 * 50GB / 100GB
+N &gt; 200
+</codeblock>
+
+      <p>
+        Because this figure is a rough estimate, the corresponding number of 
nodes could be between 100 and 500.
+      </p>
+
+      <p>
+        Depending on the complexity of the query, the processing rate of query 
might change. If the query has more
+        joins, aggregation functions, or CPU-intensive functions such as 
string processing or complex UDFs, the
+        process rate will be lower than 1.6 GB/second per node. On the other 
hand, if the query only does scan and
+        filtering on numbers, the processing rate can be higher.
+      </p>
+    </section>
+
+    <section id="sizing_mem_estimate">
+
+      <title>Estimating Memory Requirements</title>
+      <!--
+  <prolog>
+    <metadata>
+      <data name="Category" value="Memory"/>
+    </metadata>
+  </prolog>
+      -->
+
+      <p>
+        Impala can handle joins between multiple large tables. Make sure that 
statistics are collected for all the
+        joined tables, using the <codeph><xref 
href="impala_compute_stats.xml#compute_stats">COMPUTE
+        STATS</xref></codeph> statement. However, joining big tables does 
consume more memory. Follow the steps
+        below to calculate the minimum memory requirement.
+      </p>
+
+      <p>
+        Suppose you are running the following join:
+      </p>
+
+<codeblock>select a.*, b.col_1, b.col_2, â¦ b.col_n
+from a, b
+where a.key = b.key
+and b.col_1 in (1,2,4...)
+and b.col_4 in (....);
+</codeblock>
+
+      <p>
+        And suppose table <codeph>B</codeph> is smaller than table 
<codeph>A</codeph> (but still a large table).
+      </p>
+
+      <p>
+        The memory requirement for the query is the right-hand table 
(<codeph>B</codeph>), after decompression,
+        filtering (<codeph>b.col_n in ...</codeph>) and after projection (only 
using certain columns) must be less
+        than the total memory of the entire cluster.
+      </p>
+
+<codeblock>Cluster Total Memory Requirement  = Size of the smaller table *
+  selectivity factor from the predicate *
+  projection factor * compression ratio
+</codeblock>
+
+      <p>
+        In this case, assume that table <codeph>B</codeph> is 100 TB in 
Parquet format with 200 columns. The
+        predicate on <codeph>B</codeph> (<codeph>b.col_1 in ...and b.col_4 in 
...</codeph>) will select only 10% of
+        the rows from <codeph>B</codeph> and for projection, we are only 
projecting 5 columns out of 200 columns.
+        Usually, Snappy compression gives us 3 times compression, so we 
estimate a 3x compression factor.
+      </p>
+
+<codeblock>Cluster Total Memory Requirement  = Size of the smaller table *
+  selectivity factor from the predicate *
+  projection factor * compression ratio
+  = 100TB * 10% * 5/200 * 3
+  = 0.75TB
+  = 750GB
+</codeblock>
+
+      <p>
+        So, if you have a 10-node cluster, each node has 128 GB of RAM and you 
give 80% to Impala, then you have 1
+        TB of usable memory for Impala, which is more than 750GB. Therefore, 
your cluster can handle join queries
+        of this magnitude.
+      </p>
+    </section>
+  </conbody>
+</concept>

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/3be0f122/docs/topics/impala_cm_installation.xml
----------------------------------------------------------------------
diff --git a/docs/topics/impala_cm_installation.xml 
b/docs/topics/impala_cm_installation.xml
new file mode 100644
index 0000000..2cc2ac5
--- /dev/null
+++ b/docs/topics/impala_cm_installation.xml
@@ -0,0 +1,56 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE concept PUBLIC "-//OASIS//DTD DITA Concept//EN" "concept.dtd">
+<concept id="cm_installation">
+
+  <title>Installing Impala with Cloudera Manager</title>
+  <prolog>
+    <metadata>
+      <data name="Category" value="Impala"/>
+      <data name="Category" value="Installing"/>
+      <data name="Category" value="Cloudera Manager"/>
+      <data name="Category" value="Administrators"/>
+    </metadata>
+  </prolog>
+
+  <conbody>
+
+    <p>
+      Before installing Impala through the Cloudera Manager interface, make 
sure all applicable nodes have the
+      appropriate hardware configuration and levels of operating system and 
CDH. See
+      <xref href="impala_prereqs.xml#prereqs"/> for details.
+    </p>
+
+    <note rev="1.2.0">
+      <p rev="1.2.0">
+        To install the latest Impala under CDH 4, upgrade Cloudera Manager to 
4.8 or higher. Cloudera Manager 4.8 is
+        the first release that can manage the Impala catalog service 
introduced in Impala 1.2. Cloudera Manager 4.8
+        requires this service to be present, so if you upgrade to Cloudera 
Manager 4.8, also upgrade Impala to the
+        most recent version at the same time.
+<!-- Not so relevant now for 1.1.1, but maybe someday we'll capture all this 
history in a compatibility grid.
+        Upgrade to Cloudera Manager 4.6.2 or higher to enable Cloudera Manager 
to
+        handle access control for the Impala web UI, available by default 
through
+        port 25000 on each Impala host.
+        -->
+      </p>
+    </note>
+
+    <p>
+      For information on installing Impala in a Cloudera Manager-managed 
environment, see
+      <xref audience="integrated" href="cm_ig_install_impala.xml"/><xref 
audience="standalone" 
href="http://www.cloudera.com/documentation/enterprise/latest/topics/cm_ig_install_impala.html";
 scope="external" format="html">Installing Impala</xref>.
+    </p>
+
+    <p>
+      Managing your Impala installation through Cloudera Manager has a number 
of advantages. For example, when you
+      make configuration changes to CDH components using Cloudera Manager, it 
automatically applies changes to the
+      copies of configuration files, such as <codeph>hive-site.xml</codeph>, 
that Impala keeps under
+      <filepath>/etc/impala/conf</filepath>. It also sets up the Hive 
Metastore service that is required for
+      Impala running under CDH 4.1.
+    </p>
+
+    <p>
+      In some cases, depending on the level of Impala, CDH, and Cloudera 
Manager, you might need to add particular
+      component configuration details in some of the free-form option fields 
on the Impala configuration pages
+      within Cloudera Manager. <ph 
conref="../shared/impala_common.xml#common/safety_valve"/>
+    </p>
+  </conbody>
+</concept>

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/3be0f122/docs/topics/impala_comments.xml
----------------------------------------------------------------------
diff --git a/docs/topics/impala_comments.xml b/docs/topics/impala_comments.xml
new file mode 100644
index 0000000..07531dc
--- /dev/null
+++ b/docs/topics/impala_comments.xml
@@ -0,0 +1,53 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE concept PUBLIC "-//OASIS//DTD DITA Concept//EN" "concept.dtd">
+<concept id="comments">
+
+  <title>Comments</title>
+  <prolog>
+    <metadata>
+      <data name="Category" value="Impala"/>
+      <data name="Category" value="SQL"/>
+      <data name="Category" value="Developers"/>
+      <data name="Category" value="Data Analysts"/>
+    </metadata>
+  </prolog>
+
+  <conbody>
+
+    <p>
+      <indexterm audience="Cloudera">comments (SQL)</indexterm>
+      Impala supports the familiar styles of SQL comments:
+    </p>
+
+    <ul>
+      <li>
+        All text from a <codeph>--</codeph> sequence to the end of the line is 
considered a comment and ignored.
+        This type of comment can occur on a single line by itself, or after 
all or part of a statement.
+      </li>
+
+      <li>
+        All text from a <codeph>/*</codeph> sequence to the next 
<codeph>*/</codeph> sequence is considered a
+        comment and ignored. This type of comment can stretch over multiple 
lines. This type of comment can occur
+        on one or more lines by itself, in the middle of a statement, or 
before or after a statement.
+      </li>
+    </ul>
+
+    <p>
+      For example:
+    </p>
+
+<codeblock>-- This line is a comment about a table.
+create table ...;
+
+/*
+This is a multi-line comment about a query.
+*/
+select ...;
+
+select * from t /* This is an embedded comment about a query. */ where ...;
+
+select * from t -- This is a trailing comment within a multi-line command.
+where ...;
+</codeblock>
+  </conbody>
+</concept>

[41/51] [partial] incubator-impala git commit: IMPALA-3398: Add docs to main Impala branch.

Reply via email to