[05/51] [partial] incubator-impala git commit: IMPALA-3398: Add docs to main Impala branch.

jbapple Thu, 17 Nov 2016 15:12:33 -0800

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/3be0f122/docs/topics/impala_string.xml
----------------------------------------------------------------------
diff --git a/docs/topics/impala_string.xml b/docs/topics/impala_string.xml
new file mode 100644
index 0000000..1be9d98
--- /dev/null
+++ b/docs/topics/impala_string.xml
@@ -0,0 +1,180 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE concept PUBLIC "-//OASIS//DTD DITA Concept//EN" "concept.dtd">
+<concept id="string">
+
+  <title>STRING Data Type</title>
+  <titlealts audience="PDF"><navtitle>STRING</navtitle></titlealts>
+  <prolog>
+    <metadata>
+      <data name="Category" value="Impala"/>
+      <data name="Category" value="Impala Data Types"/>
+      <data name="Category" value="SQL"/>
+      <data name="Category" value="Data Analysts"/>
+      <data name="Category" value="Developers"/>
+      <data name="Category" value="Schemas"/>
+    </metadata>
+  </prolog>
+
+  <conbody>
+
+    <p>
+      A data type used in <codeph>CREATE TABLE</codeph> and <codeph>ALTER 
TABLE</codeph> statements.
+    </p>
+
+    <p conref="../shared/impala_common.xml#common/syntax_blurb"/>
+
+    <p>
+      In the column definition of a <codeph>CREATE TABLE</codeph> statement:
+    </p>
+
+<codeblock><varname>column_name</varname> STRING</codeblock>
+
+    <p>
+      <b>Length:</b> Maximum of 32,767 bytes. Do not use any length constraint 
when declaring
+      <codeph>STRING</codeph> columns, as you might be familiar with from 
<codeph>VARCHAR</codeph>,
+      <codeph>CHAR</codeph>, or similar column types from relational database 
systems. <ph rev="2.0.0">If you do
+      need to manipulate string values with precise or maximum lengths, in 
Impala 2.0 and higher you can declare
+      columns as <codeph>VARCHAR(<varname>max_length</varname>)</codeph> or
+      <codeph>CHAR(<varname>length</varname>)</codeph>, but for best 
performance use <codeph>STRING</codeph>
+      where practical.</ph>
+    </p>
+
+    <p>
+      <b>Character sets:</b> For full support in all Impala subsystems, 
restrict string values to the ASCII
+      character set. Although some UTF-8 character data can be stored in 
Impala and retrieved through queries, UTF-8 strings
+      containing non-ASCII characters are not guaranteed to work properly in 
combination with many SQL aspects,
+      including but not limited to:
+    </p>
+    <ul>
+      <li>
+        String manipulation functions.
+      </li>
+      <li>
+        Comparison operators.
+      </li>
+      <li>
+        The <codeph>ORDER BY</codeph> clause.
+      </li>
+      <li>
+        Values in partition key columns.
+      </li>
+    </ul>
+
+    <p>
+      For any national language aspects such as
+      collation order or interpreting extended ASCII variants such as 
ISO-8859-1 or ISO-8859-2 encodings, Impala
+      does not include such metadata with the table definition. If you need to 
sort, manipulate, or display data
+      depending on those national language characteristics of string data, use 
logic on the application side.
+    </p>
+
+    <p>
+      <b>Conversions:</b>
+    </p>
+
+    <ul>
+      <li>
+        <p>
+          Impala does not automatically convert <codeph>STRING</codeph> to any 
numeric type. Impala does
+          automatically convert <codeph>STRING</codeph> to 
<codeph>TIMESTAMP</codeph> if the value matches one of
+          the accepted <codeph>TIMESTAMP</codeph> formats; see <xref 
href="impala_timestamp.xml#timestamp"/> for
+          details.
+        </p>
+      </li>
+
+      <li>
+        <p>
+          You can use <codeph>CAST()</codeph> to convert 
<codeph>STRING</codeph> values to
+          <codeph>TINYINT</codeph>, <codeph>SMALLINT</codeph>, 
<codeph>INT</codeph>, <codeph>BIGINT</codeph>,
+          <codeph>FLOAT</codeph>, <codeph>DOUBLE</codeph>, or 
<codeph>TIMESTAMP</codeph>.
+        </p>
+      </li>
+
+      <li>
+        <p>
+          You cannot directly cast a <codeph>STRING</codeph> value to 
<codeph>BOOLEAN</codeph>. You can use a
+          <codeph>CASE</codeph> expression to evaluate string values such as 
<codeph>'T'</codeph>,
+          <codeph>'true'</codeph>, and so on and return Boolean 
<codeph>true</codeph> and <codeph>false</codeph>
+          values as appropriate.
+        </p>
+      </li>
+
+      <li>
+        <p>
+          You can cast a <codeph>BOOLEAN</codeph> value to 
<codeph>STRING</codeph>, returning <codeph>'1'</codeph>
+          for <codeph>true</codeph> values and <codeph>'0'</codeph> for 
<codeph>false</codeph> values.
+        </p>
+      </li>
+    </ul>
+
+    <p conref="../shared/impala_common.xml#common/partitioning_blurb"/>
+
+    <p>
+      Although it might be convenient to use <codeph>STRING</codeph> columns 
for partition keys, even when those
+      columns contain numbers, for performance and scalability it is much 
better to use numeric columns as
+      partition keys whenever practical. Although the underlying HDFS 
directory name might be the same in either
+      case, the in-memory storage for the partition key columns is more 
compact, and computations are faster, if
+      partition key columns such as <codeph>YEAR</codeph>, 
<codeph>MONTH</codeph>, <codeph>DAY</codeph> and so on
+      are declared as <codeph>INT</codeph>, <codeph>SMALLINT</codeph>, and so 
on.
+    </p>
+
+    <p conref="../shared/impala_common.xml#common/zero_length_strings"/>
+
+<!-- <p conref="../shared/impala_common.xml#common/hbase_blurb"/> -->
+
+<!-- <p conref="../shared/impala_common.xml#common/parquet_blurb"/> -->
+
+    <p conref="../shared/impala_common.xml#common/text_bulky"/>
+
+    <p><b>Avro considerations:</b></p>
+    <p conref="../shared/impala_common.xml#common/avro_2gb_strings"/>
+
+<!-- <p conref="../shared/impala_common.xml#common/compatibility_blurb"/> -->
+
+<!-- <p conref="../shared/impala_common.xml#common/internals_blurb"/> -->
+
+<!-- <p conref="../shared/impala_common.xml#common/added_in_20"/> -->
+
+    <p conref="../shared/impala_common.xml#common/column_stats_variable"/>
+
+    <p conref="../shared/impala_common.xml#common/example_blurb"/>
+
+    <p>
+      The following examples demonstrate double-quoted and single-quoted 
string literals, and required escaping for
+      quotation marks within string literals:
+    </p>
+
+<codeblock>SELECT 'I am a single-quoted string';
+SELECT "I am a double-quoted string";
+SELECT 'I\'m a single-quoted string with an apostrophe';
+SELECT "I\'m a double-quoted string with an apostrophe";
+SELECT 'I am a "short" single-quoted string containing quotes';
+SELECT "I am a \"short\" double-quoted string containing quotes";
+</codeblock>
+
+    <p>
+      The following examples demonstrate calls to string manipulation 
functions to concatenate strings, convert
+      numbers to strings, or pull out substrings:
+    </p>
+
+<codeblock>SELECT CONCAT("Once upon a time, there were ", CAST(3 AS STRING), ' 
little pigs.');
+SELECT SUBSTR("hello world",7,5);
+</codeblock>
+
+    <p>
+      The following examples show how to perform operations on 
<codeph>STRING</codeph> columns within a table:
+    </p>
+
+<codeblock>CREATE TABLE t1 (s1 STRING, s2 STRING);
+INSERT INTO t1 VALUES ("hello", 'world'), (CAST(7 AS STRING), "wonders");
+SELECT s1, s2, length(s1) FROM t1 WHERE s2 LIKE 'w%';
+</codeblock>
+
+    <p conref="../shared/impala_common.xml#common/related_info"/>
+
+    <p>
+      <xref href="impala_literals.xml#string_literals"/>, <xref 
href="impala_char.xml#char"/>,
+      <xref href="impala_varchar.xml#varchar"/>, <xref 
href="impala_string_functions.xml#string_functions"/>,
+      <xref href="impala_datetime_functions.xml#datetime_functions"/>
+    </p>
+  </conbody>
+</concept>


http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/3be0f122/docs/topics/impala_string_functions.xml
----------------------------------------------------------------------
diff --git a/docs/topics/impala_string_functions.xml 
b/docs/topics/impala_string_functions.xml
new file mode 100644
index 0000000..8f8636f
--- /dev/null
+++ b/docs/topics/impala_string_functions.xml
@@ -0,0 +1,926 @@
+<?xml version="1.0" encoding="UTF-8"?><!DOCTYPE concept PUBLIC "-//OASIS//DTD 
DITA Concept//EN" "concept.dtd">
+<concept id="string_functions">
+
+  <title>Impala String Functions</title>
+  <titlealts audience="PDF"><navtitle>String Functions</navtitle></titlealts>
+  <prolog>
+    <metadata>
+      <data name="Category" value="Impala"/>
+      <data name="Category" value="Impala Functions"/>
+      <data name="Category" value="SQL"/>
+      <data name="Category" value="Data Analysts"/>
+      <data name="Category" value="Developers"/>
+      <data name="Category" value="Querying"/>
+    </metadata>
+  </prolog>
+
+  <conbody>
+
+    <p rev="2.0.0">
+      String functions are classified as those primarily accepting or 
returning <codeph>STRING</codeph>,
+      <codeph>VARCHAR</codeph>, or <codeph>CHAR</codeph> data types, for 
example to measure the length of a string
+      or concatenate two strings together.
+      <ul>
+        <li>
+          All the functions that accept <codeph>STRING</codeph> arguments also 
accept the <codeph>VARCHAR</codeph>
+          and <codeph>CHAR</codeph> types introduced in Impala 2.0.
+        </li>
+
+        <li>
+          Whenever <codeph>VARCHAR</codeph> or <codeph>CHAR</codeph> values 
are passed to a function that returns a
+          string value, the return type is normalized to 
<codeph>STRING</codeph>. For example, a call to
+          <codeph>concat()</codeph> with a mix of <codeph>STRING</codeph>, 
<codeph>VARCHAR</codeph>, and
+          <codeph>CHAR</codeph> arguments produces a <codeph>STRING</codeph> 
result.
+        </li>
+      </ul>
+    </p>
+
+    <p conref="../shared/impala_common.xml#common/related_info"/>
+
+    <p>
+      The string functions operate mainly on these data types: <xref 
href="impala_string.xml#string"/>,
+      <xref href="impala_varchar.xml#varchar"/>, and <xref 
href="impala_char.xml#char"/>.
+    </p>
+
+    <p>
+      <b>Function reference:</b>
+    </p>
+
+    <p>
+      Impala supports the following string functions:
+    </p>
+
+    <dl>
+      <dlentry id="ascii">
+
+        <dt>
+          <codeph>ascii(string str)</codeph>
+        </dt>
+
+        <dd>
+          <indexterm audience="Cloudera">ascii() function</indexterm>
+          <b>Purpose:</b> Returns the numeric ASCII code of the first 
character of the argument.
+          <p>
+            <b>Return type:</b> <codeph>int</codeph>
+          </p>
+        </dd>
+
+      </dlentry>
+
+      <dlentry rev="2.3.0" id="btrim">
+
+        <dt>
+          <codeph>btrim(string a)</codeph>,
+          <codeph>btrim(string a, string chars_to_trim)</codeph>
+        </dt>
+
+        <dd>
+          <indexterm audience="Cloudera">btrim() function</indexterm>
+          <b>Purpose:</b> Removes all instances of one or more characters
+          from the start and end of a <codeph>STRING</codeph> value.
+          By default, removes only spaces.
+          If a non-<codeph>NULL</codeph> optional second argument is 
specified, the function removes all
+          occurrences of characters in that second argument from the beginning 
and
+          end of the string.
+          <p><b>Return type:</b> <codeph>string</codeph></p>
+          <p conref="../shared/impala_common.xml#common/added_in_230"/>
+          <p conref="../shared/impala_common.xml#common/example_blurb"/>
+          <p>
+            The following examples show the default <codeph>btrim()</codeph> 
behavior,
+            and what changes when you specify the optional second argument.
+            All the examples bracket the output value with <codeph>[ ]</codeph>
+            so that you can see any leading or trailing spaces in the 
<codeph>btrim()</codeph> result.
+            By default, the function removes and number of both leading and 
trailing spaces.
+            When the second argument is specified, any number of occurrences 
of any
+            character in the second argument are removed from the start and 
end of the
+            input string; in this case, spaces are not removed (unless they 
are part of the second
+            argument) and any instances of the characters are not removed if 
they do not come
+            right at the beginning or end of the string.
+          </p>
+<codeblock>-- Remove multiple spaces before and one space after.
+select concat('[',btrim('    hello '),']');
++---------------------------------------+
+| concat('[', btrim('    hello '), ']') |
++---------------------------------------+
+| [hello]                               |
++---------------------------------------+
+
+-- Remove any instances of x or y or z at beginning or end. Leave spaces alone.
+select concat('[',btrim('xy    hello zyzzxx','xyz'),']');
++------------------------------------------------------+
+| concat('[', btrim('xy    hello zyzzxx', 'xyz'), ']') |
++------------------------------------------------------+
+| [    hello ]                                         |
++------------------------------------------------------+
+
+-- Remove any instances of x or y or z at beginning or end.
+-- Leave x, y, z alone in the middle of the string.
+select concat('[',btrim('xyhelxyzlozyzzxx','xyz'),']');
++----------------------------------------------------+
+| concat('[', btrim('xyhelxyzlozyzzxx', 'xyz'), ']') |
++----------------------------------------------------+
+| [helxyzlo]                                         |
++----------------------------------------------------+
+</codeblock>
+        </dd>
+
+      </dlentry>
+
+      <dlentry rev="1.3.0" id="char_length">
+
+        <dt>
+          <codeph>char_length(string a), <ph rev="1.3.0" 
id="character_length">character_length(string a)</ph></codeph>
+        </dt>
+
+        <dd>
+          <indexterm audience="Cloudera">char_length() function</indexterm>
+          <indexterm audience="Cloudera">character_length() 
function</indexterm>
+          <b>Purpose:</b> Returns the length in characters of the argument 
string. Aliases for the
+          <codeph>length()</codeph> function.
+          <p>
+            <b>Return type:</b> <codeph>int</codeph>
+          </p>
+        </dd>
+
+      </dlentry>
+
+      <dlentry rev="2.3.0" id="chr">
+
+        <dt>
+          <codeph>chr(int character_code)</codeph>
+        </dt>
+
+        <dd>
+          <indexterm audience="Cloudera">chr() function</indexterm>
+          <b>Purpose:</b> Returns a character specified by a decimal code 
point value.
+          The interpretation and display of the resulting character depends on 
your system locale.
+          Because consistent processing of Impala string values is only 
guaranteed
+          for values within the ASCII range, only use this function for values
+          corresponding to ASCII characters.
+          In particular, parameter values greater than 255 return an empty 
string.
+          <p><b>Return type:</b> <codeph>string</codeph></p>
+          <p>
+            <b>Usage notes:</b> Can be used as the inverse of the 
<codeph>ascii()</codeph> function, which
+            converts a character to its numeric ASCII code.
+          </p>
+          <p conref="../shared/impala_common.xml#common/added_in_230"/>
+          <p conref="../shared/impala_common.xml#common/example_blurb"/>
+<codeblock>SELECT chr(65);
++---------+
+| chr(65) |
++---------+
+| A       |
++---------+
+
+SELECT chr(97);
++---------+
+| chr(97) |
++---------+
+| a       |
++---------+
+</codeblock>
+        </dd>
+
+      </dlentry>
+
+      <dlentry id="concat">
+
+        <dt>
+          <codeph>concat(string a, string b...)</codeph>
+        </dt>
+
+        <dd>
+          <indexterm audience="Cloudera">concat() function</indexterm>
+          <b>Purpose:</b> Returns a single string representing all the 
argument values joined together.
+          <p>
+            <b>Return type:</b> <codeph>string</codeph>
+          </p>
+          <p conref="../shared/impala_common.xml#common/concat_blurb"/>
+        </dd>
+
+      </dlentry>
+
+      <dlentry id="concat_ws">
+
+        <dt>
+          <codeph>concat_ws(string sep, string a, string b...)</codeph>
+        </dt>
+
+        <dd>
+          <indexterm audience="Cloudera">concat_ws() function</indexterm>
+          <b>Purpose:</b> Returns a single string representing the second and 
following argument values joined
+          together, delimited by a specified separator.
+          <p>
+            <b>Return type:</b> <codeph>string</codeph>
+          </p>
+          <p conref="../shared/impala_common.xml#common/concat_blurb"/>
+        </dd>
+
+      </dlentry>
+
+      <dlentry id="find_in_set">
+
+        <dt>
+          <codeph>find_in_set(string str, string strList)</codeph>
+        </dt>
+
+        <dd>
+          <indexterm audience="Cloudera">find_in_set() function</indexterm>
+          <b>Purpose:</b> Returns the position (starting from 1) of the first 
occurrence of a specified string
+          within a comma-separated string. Returns <codeph>NULL</codeph> if 
either argument is
+          <codeph>NULL</codeph>, 0 if the search string is not found, or 0 if 
the search string contains a comma.
+          <p>
+            <b>Return type:</b> <codeph>int</codeph>
+          </p>
+        </dd>
+
+      </dlentry>
+
+      <dlentry rev="1.2" id="group_concat">
+
+        <dt>
+          <codeph>group_concat(string s [, string sep])</codeph>
+        </dt>
+
+        <dd>
+          <indexterm audience="Cloudera">group_concat() function</indexterm>
+          <b>Purpose:</b> Returns a single string representing the argument 
value concatenated together for each
+          row of the result set. If the optional separator string is 
specified, the separator is added between each
+          pair of concatenated values.
+          <p>
+            <b>Return type:</b> <codeph>string</codeph>
+          </p>
+          <p conref="../shared/impala_common.xml#common/concat_blurb"/>
+          <p>
+            By default, returns a single string covering the whole result set. 
To include other columns or values
+            in the result set, or to produce multiple concatenated strings for 
subsets of rows, include a
+            <codeph>GROUP BY</codeph> clause in the query.
+          </p>
+          <p>
+            Strictly speaking, <codeph>group_concat()</codeph> is an aggregate 
function, not a scalar
+            function like the others in this list.
+            For additional details and examples, see <xref 
href="impala_group_concat.xml#group_concat"/>.
+          </p>
+        </dd>
+
+      </dlentry>
+
+      <dlentry rev="1.2" id="initcap">
+
+        <dt>
+          <codeph>initcap(string str)</codeph>
+        </dt>
+
+        <dd>
+          <indexterm audience="Cloudera">initcap() function</indexterm>
+          <b>Purpose:</b> Returns the input string with the first letter 
capitalized.
+          <p>
+            <b>Return type:</b> <codeph>string</codeph>
+          </p>
+        </dd>
+
+      </dlentry>
+
+      <dlentry id="instr">
+
+        <dt>
+          <codeph>instr(string str, string substr)</codeph>
+        </dt>
+
+        <dd>
+          <indexterm audience="Cloudera">instr() function</indexterm>
+          <b>Purpose:</b> Returns the position (starting from 1) of the first 
occurrence of a substring within a
+          longer string.
+          <p>
+            <b>Return type:</b> <codeph>int</codeph>
+          </p>
+        </dd>
+
+      </dlentry>
+
+      <dlentry id="length">
+
+        <dt>
+          <codeph>length(string a)</codeph>
+        </dt>
+
+        <dd>
+          <indexterm audience="Cloudera">length() function</indexterm>
+          <b>Purpose:</b> Returns the length in characters of the argument 
string.
+          <p>
+            <b>Return type:</b> <codeph>int</codeph>
+          </p>
+        </dd>
+
+      </dlentry>
+
+      <dlentry id="locate">
+
+        <dt>
+          <codeph>locate(string substr, string str[, int pos])</codeph>
+        </dt>
+
+        <dd>
+          <indexterm audience="Cloudera">locate() function</indexterm>
+          <b>Purpose:</b> Returns the position (starting from 1) of the first 
occurrence of a substring within a
+          longer string, optionally after a particular position.
+          <p>
+            <b>Return type:</b> <codeph>int</codeph>
+          </p>
+        </dd>
+
+      </dlentry>
+
+      <dlentry id="lower">
+
+        <dt>
+          <codeph>lower(string a), <ph id="lcase">lcase(string a)</ph> 
</codeph>
+        </dt>
+
+        <dd>
+          <indexterm audience="Cloudera">lower() function</indexterm>
+          <b>Purpose:</b> Returns the argument string converted to 
all-lowercase.
+          <p>
+            <b>Return type:</b> <codeph>string</codeph>
+          </p>
+          <p conref="../shared/impala_common.xml#common/usage_notes_blurb"/>
+          <p 
conref="../shared/impala_common.xml#common/case_insensitive_comparisons_tip"/>
+        </dd>
+
+      </dlentry>
+
+      <dlentry id="lpad">
+
+        <dt>
+          <codeph>lpad(string str, int len, string pad)</codeph>
+        </dt>
+
+        <dd>
+          <indexterm audience="Cloudera">lpad() function</indexterm>
+          <b>Purpose:</b> Returns a string of a specified length, based on the 
first argument string. If the
+          specified string is too short, it is padded on the left with a 
repeating sequence of the characters from
+          the pad string. If the specified string is too long, it is truncated 
on the right.
+          <p>
+            <b>Return type:</b> <codeph>string</codeph>
+          </p>
+        </dd>
+
+      </dlentry>
+
+      <dlentry id="ltrim">
+
+        <dt>
+          <codeph>ltrim(string a)</codeph>
+        </dt>
+
+        <dd>
+          <indexterm audience="Cloudera">ltrim() function</indexterm>
+          <b>Purpose:</b> Returns the argument string with any leading spaces 
removed from the left side.
+          <p>
+            <b>Return type:</b> <codeph>string</codeph>
+          </p>
+        </dd>
+
+      </dlentry>
+
+      <dlentry id="parse_url">
+
+        <dt>
+          <codeph>parse_url(string urlString, string partToExtract [, string 
keyToExtract])</codeph>
+        </dt>
+
+        <dd>
+          <indexterm audience="Cloudera">parse_url() function</indexterm>
+          <b>Purpose:</b> Returns the portion of a URL corresponding to a 
specified part. The part argument can be
+          <codeph>'PROTOCOL'</codeph>, <codeph>'HOST'</codeph>, 
<codeph>'PATH'</codeph>, <codeph>'REF'</codeph>,
+          <codeph>'AUTHORITY'</codeph>, <codeph>'FILE'</codeph>, 
<codeph>'USERINFO'</codeph>, or
+          <codeph>'QUERY'</codeph>. Uppercase is required for these literal 
values. When requesting the
+          <codeph>QUERY</codeph> portion of the URL, you can optionally 
specify a key to retrieve just the
+          associated value from the key-value pairs in the query string.
+          <p>
+            <b>Return type:</b> <codeph>string</codeph>
+          </p>
+          <p>
+            <b>Usage notes:</b> This function is important for the traditional 
Hadoop use case of interpreting web
+            logs. For example, if the web traffic data features raw URLs not 
divided into separate table columns,
+            you can count visitors to a particular page by extracting the 
<codeph>'PATH'</codeph> or
+            <codeph>'FILE'</codeph> field, or analyze search terms by 
extracting the corresponding key from the
+            <codeph>'QUERY'</codeph> field.
+          </p>
+        </dd>
+
+      </dlentry>
+
+      <dlentry id="regexp_extract">
+
+        <dt>
+          <codeph>regexp_extract(string subject, string pattern, int 
index)</codeph>
+        </dt>
+
+        <dd>
+          <indexterm audience="Cloudera">regexp_extract() function</indexterm>
+          <b>Purpose:</b> Returns the specified () group from a string based 
on a regular expression pattern. Group
+          0 refers to the entire extracted string, while group 1, 2, and so on 
refers to the first, second, and so
+          on <codeph>(...)</codeph> portion.
+          <p>
+            <b>Return type:</b> <codeph>string</codeph>
+          </p>
+          <p conref="../shared/impala_common.xml#common/regexp_re2"/>
+          <p conref="../shared/impala_common.xml#common/regexp_re2_warning"/>
+          <p conref="../shared/impala_common.xml#common/regexp_escapes"/>
+          <p conref="../shared/impala_common.xml#common/example_blurb"/>
+          <p>
+            This example shows how group 0 matches the full pattern string, 
including the portion outside any
+            <codeph>()</codeph> group:
+          </p>
+<codeblock>[localhost:21000] &gt; select 
regexp_extract('abcdef123ghi456jkl','.*?(\\d+)',0);
++------------------------------------------------------+
+| regexp_extract('abcdef123ghi456jkl', '.*?(\\d+)', 0) |
++------------------------------------------------------+
+| abcdef123ghi456                                      |
++------------------------------------------------------+
+Returned 1 row(s) in 0.11s</codeblock>
+          <p>
+            This example shows how group 1 matches just the contents inside 
the first <codeph>()</codeph> group in
+            the pattern string:
+          </p>
+<codeblock>[localhost:21000] &gt; select 
regexp_extract('abcdef123ghi456jkl','.*?(\\d+)',1);
++------------------------------------------------------+
+| regexp_extract('abcdef123ghi456jkl', '.*?(\\d+)', 1) |
++------------------------------------------------------+
+| 456                                                  |
++------------------------------------------------------+
+Returned 1 row(s) in 0.11s</codeblock>
+          <p rev="2.0.0">
+            Unlike in earlier Impala releases, the regular expression library 
used in Impala 2.0 and later supports
+            the <codeph>.*?</codeph> idiom for non-greedy matches. This 
example shows how a pattern string starting
+            with <codeph>.*?</codeph> matches the shortest possible portion of 
the source string, returning the
+            rightmost set of lowercase letters. A pattern string both starting 
and ending with <codeph>.*?</codeph>
+            finds two potential matches of equal length, and returns the first 
one found (the leftmost set of
+            lowercase letters).
+          </p>
+<codeblock>[localhost:21000] &gt; select 
regexp_extract('AbcdBCdefGHI','.*?([[:lower:]]+)',1);
++--------------------------------------------------------+
+| regexp_extract('abcdbcdefghi', '.*?([[:lower:]]+)', 1) |
++--------------------------------------------------------+
+| def                                                    |
++--------------------------------------------------------+
+[localhost:21000] &gt; select 
regexp_extract('AbcdBCdefGHI','.*?([[:lower:]]+).*?',1);
++-----------------------------------------------------------+
+| regexp_extract('abcdbcdefghi', '.*?([[:lower:]]+).*?', 1) |
++-----------------------------------------------------------+
+| bcd                                                       |
++-----------------------------------------------------------+
+</codeblock>
+        </dd>
+
+      </dlentry>
+
+      <dlentry rev="2.3.0" id="regexp_like">
+
+        <dt>
+          <codeph>regexp_like(string source, string pattern[, string 
options])</codeph>
+        </dt>
+
+        <dd>
+          <indexterm audience="Cloudera">regexp_like() function</indexterm>
+          <b>Purpose:</b> Returns <codeph>true</codeph> or 
<codeph>false</codeph> to indicate
+          whether the source string contains anywhere inside it the regular 
expression given by the pattern.
+          The optional third argument consists of letter flags that change how 
the match is performed,
+          such as <codeph>i</codeph> for case-insensitive matching.
+          <p conref="../shared/impala_common.xml#common/syntax_blurb"/>
+          <p>
+            The flags that you can include in the optional third argument are:
+          </p>
+          <ul>
+          <li>
+          <codeph>c</codeph>: Case-sensitive matching (the default).
+          </li>
+          <li>
+          <codeph>i</codeph>: Case-insensitive matching. If multiple instances 
of <codeph>c</codeph> and <codeph>i</codeph>
+          are included in the third argument, the last such option takes 
precedence.
+          </li>
+          <li>
+          <codeph>m</codeph>: Multi-line matching. The <codeph>^</codeph> and 
<codeph>$</codeph>
+          operators match the start or end of any line within the source 
string, not the
+          start and end of the entire string.
+          </li>
+          <li>
+          <codeph>n</codeph>: Newline matching. The <codeph>.</codeph> 
operator can match the
+          newline character. A repetition operator such as <codeph>.*</codeph> 
can
+          match a portion of the source string that spans multiple lines.
+          </li>
+          </ul>
+          <p>
+            <b>Return type:</b> <codeph>boolean</codeph>
+          </p>
+          <p conref="../shared/impala_common.xml#common/regexp_re2"/>
+          <p conref="../shared/impala_common.xml#common/regexp_re2_warning"/>
+          <p conref="../shared/impala_common.xml#common/regexp_escapes"/>
+          <p conref="../shared/impala_common.xml#common/example_blurb"/>
+          <p>
+            This example shows how <codeph>regexp_like()</codeph> can test for 
the existence
+            of various kinds of regular expression patterns within a source 
string:
+          </p>
+<codeblock><![CDATA[
+-- Matches because the 'f' appears somewhere in 'foo'.
+select regexp_like('foo','f');
++-------------------------+
+| regexp_like('foo', 'f') |
++-------------------------+
+| true                    |
++-------------------------+
+
+-- Does not match because the comparison is case-sensitive by default.
+select regexp_like('foo','F');
++-------------------------+
+| regexp_like('foo', 'f') |
++-------------------------+
+| false                   |
++-------------------------+
+
+-- The 3rd argument can change the matching logic, such as 'i' meaning 
case-insensitive.
+select regexp_like('foo','F','i');
++------------------------------+
+| regexp_like('foo', 'f', 'i') |
++------------------------------+
+| true                         |
++------------------------------+
+
+-- The familiar regular expression notations work, such as ^ and $ anchors...
+select regexp_like('foo','f$');
++--------------------------+
+| regexp_like('foo', 'f$') |
++--------------------------+
+| false                    |
++--------------------------+
+
+select regexp_like('foo','o$');
++--------------------------+
+| regexp_like('foo', 'o$') |
++--------------------------+
+| true                     |
++--------------------------+
+
+-- ...and repetition operators such as * and +
+select regexp_like('foooooobar','fo+b');
++-----------------------------------+
+| regexp_like('foooooobar', 'fo+b') |
++-----------------------------------+
+| true                              |
++-----------------------------------+
+
+select regexp_like('foooooobar','fx*y*o*b');
++---------------------------------------+
+| regexp_like('foooooobar', 'fx*y*o*b') |
++---------------------------------------+
+| true                                  |
++---------------------------------------+
+]]>
+</codeblock>
+        </dd>
+
+      </dlentry>
+
+      <dlentry id="regexp_replace">
+
+        <dt>
+          <codeph>regexp_replace(string initial, string pattern, string 
replacement)</codeph>
+        </dt>
+
+        <dd>
+          <indexterm audience="Cloudera">regexp_replace() function</indexterm>
+          <b>Purpose:</b> Returns the initial argument with the regular 
expression pattern replaced by the final
+          argument string.
+          <p>
+            <b>Return type:</b> <codeph>string</codeph>
+          </p>
+          <p conref="../shared/impala_common.xml#common/regexp_re2"/>
+          <p conref="../shared/impala_common.xml#common/regexp_re2_warning"/>
+          <p conref="../shared/impala_common.xml#common/regexp_escapes"/>
+          <p conref="../shared/impala_common.xml#common/example_blurb"/>
+          <p>
+            These examples show how you can replace parts of a string matching 
a pattern with replacement text,
+            which can include backreferences to any <codeph>()</codeph> groups 
in the pattern string. The
+            backreference numbers start at 1, and any <codeph>\</codeph> 
characters must be escaped as
+            <codeph>\\</codeph>.
+          </p>
+          <p>
+            Replace a character pattern with new text:
+          </p>
+<codeblock>[localhost:21000] &gt; select 
regexp_replace('aaabbbaaa','b+','xyz');
++------------------------------------------+
+| regexp_replace('aaabbbaaa', 'b+', 'xyz') |
++------------------------------------------+
+| aaaxyzaaa                                |
++------------------------------------------+
+Returned 1 row(s) in 0.11s</codeblock>
+          <p>
+            Replace a character pattern with substitution text that includes 
the original matching text:
+          </p>
+<codeblock>[localhost:21000] &gt; select 
regexp_replace('aaabbbaaa','(b+)','&lt;\\1&gt;');
++----------------------------------------------+
+| regexp_replace('aaabbbaaa', '(b+)', '&lt;\\1&gt;') |
++----------------------------------------------+
+| aaa&lt;bbb&gt;aaa                                  |
++----------------------------------------------+
+Returned 1 row(s) in 0.11s</codeblock>
+          <p>
+            Remove all characters that are not digits:
+          </p>
+<codeblock>[localhost:21000] &gt; select 
regexp_replace('123-456-789','[^[:digit:]]','');
++---------------------------------------------------+
+| regexp_replace('123-456-789', '[^[:digit:]]', '') |
++---------------------------------------------------+
+| 123456789                                         |
++---------------------------------------------------+
+Returned 1 row(s) in 0.12s</codeblock>
+        </dd>
+
+      </dlentry>
+
+      <dlentry id="repeat">
+
+        <dt>
+          <codeph>repeat(string str, int n)</codeph>
+        </dt>
+
+        <dd>
+          <indexterm audience="Cloudera">repeat() function</indexterm>
+          <b>Purpose:</b> Returns the argument string repeated a specified 
number of times.
+          <p>
+            <b>Return type:</b> <codeph>string</codeph>
+          </p>
+        </dd>
+
+      </dlentry>
+
+      <dlentry id="reverse">
+
+        <dt>
+          <codeph>reverse(string a)</codeph>
+        </dt>
+
+        <dd>
+          <indexterm audience="Cloudera">reverse() function</indexterm>
+          <b>Purpose:</b> Returns the argument string with characters in 
reversed order.
+          <p>
+            <b>Return type:</b> <codeph>string</codeph>
+          </p>
+        </dd>
+
+      </dlentry>
+
+      <dlentry id="rpad">
+
+        <dt>
+          <codeph>rpad(string str, int len, string pad)</codeph>
+        </dt>
+
+        <dd>
+          <indexterm audience="Cloudera">rpad() function</indexterm>
+          <b>Purpose:</b> Returns a string of a specified length, based on the 
first argument string. If the
+          specified string is too short, it is padded on the right with a 
repeating sequence of the characters from
+          the pad string. If the specified string is too long, it is truncated 
on the right.
+          <p>
+            <b>Return type:</b> <codeph>string</codeph>
+          </p>
+        </dd>
+
+      </dlentry>
+
+      <dlentry id="rtrim">
+
+        <dt>
+          <codeph>rtrim(string a)</codeph>
+        </dt>
+
+        <dd>
+          <indexterm audience="Cloudera">rtrim() function</indexterm>
+          <b>Purpose:</b> Returns the argument string with any trailing spaces 
removed from the right side.
+          <p>
+            <b>Return type:</b> <codeph>string</codeph>
+          </p>
+        </dd>
+
+      </dlentry>
+
+      <dlentry id="space">
+
+        <dt>
+          <codeph>space(int n)</codeph>
+        </dt>
+
+        <dd>
+          <indexterm audience="Cloudera">space() function</indexterm>
+          <b>Purpose:</b> Returns a concatenated string of the specified 
number of spaces. Shorthand for
+          <codeph>repeat(' ',<varname>n</varname>)</codeph>.
+          <p>
+            <b>Return type:</b> <codeph>string</codeph>
+          </p>
+        </dd>
+
+      </dlentry>
+
+      <dlentry rev="2.3.0 CDH-35915 IMPALA-2084" id="split_part">
+
+        <dt>
+          <codeph>split_part(string source, string delimiter, bigint 
n)</codeph>
+        </dt>
+
+        <dd>
+          <indexterm audience="Cloudera">split_part() function</indexterm>
+          <b>Purpose:</b> Returns the nth field within a delimited string.
+          The fields are numbered starting from 1.
+          The delimiter can consist of multiple characters, not just a
+          single character. All matching of the delimiter is done exactly, not 
using any
+          regular expression patterns.
+          <p>
+            <b>Return type:</b> <codeph>string</codeph>
+          </p>
+          <p conref="../shared/impala_common.xml#common/regexp_re2"/>
+          <p conref="../shared/impala_common.xml#common/regexp_re2_warning"/>
+          <p conref="../shared/impala_common.xml#common/regexp_escapes"/>
+          <p conref="../shared/impala_common.xml#common/example_blurb"/>
+          <p>
+            These examples show how to retrieve the nth field from a delimited 
string:
+          </p>
+<codeblock><![CDATA[
+select split_part('x,y,z',',',1);
++-----------------------------+
+| split_part('x,y,z', ',', 1) |
++-----------------------------+
+| x                           |
++-----------------------------+
+
+select split_part('x,y,z',',',2);
++-----------------------------+
+| split_part('x,y,z', ',', 2) |
++-----------------------------+
+| y                           |
++-----------------------------+
+
+select split_part('x,y,z',',',3);
++-----------------------------+
+| split_part('x,y,z', ',', 3) |
++-----------------------------+
+| z                           |
++-----------------------------+
+</codeblock>
+
+          <p>
+            These examples show what happens for out-of-range field positions.
+            Specifying a value less than 1 produces an error. Specifying a 
value
+            greater than the number of fields returns a zero-length string
+            (which is not the same as <codeph>NULL</codeph>).
+          </p>
+<codeblock><![CDATA[
+select split_part('x,y,z',',',0);
+ERROR: Invalid field position: 0
+
+with t1 as (select split_part('x,y,z',',',4) nonexistent_field)
+  select
+      nonexistent_field
+    , concat('[',nonexistent_field,']')
+    , length(nonexistent_field);
+from t1
++-------------------+-------------------------------------+---------------------------+
+| nonexistent_field | concat('[', nonexistent_field, ']') | 
length(nonexistent_field) |
++-------------------+-------------------------------------+---------------------------+
+|                   | []                                  | 0                  
       |
++-------------------+-------------------------------------+---------------------------+
+</codeblock>
+
+          <p>
+            These examples show how the delimiter can be a multi-character 
value:
+          </p>
+<codeblock><![CDATA[
+select split_part('one***two***three','***',2);
++-------------------------------------------+
+| split_part('one***two***three', '***', 2) |
++-------------------------------------------+
+| two                                       |
++-------------------------------------------+
+
+select split_part('one\|/two\|/three','\|/',3);
++-------------------------------------------+
+| split_part('one\|/two\|/three', '\|/', 3) |
++-------------------------------------------+
+| three                                     |
++-------------------------------------------+
+]]>
+</codeblock>
+        </dd>
+
+      </dlentry>
+
+      <dlentry id="strleft">
+
+        <dt>
+          <codeph>strleft(string a, int num_chars)</codeph>
+        </dt>
+
+        <dd>
+          <indexterm audience="Cloudera">strleft() function</indexterm>
+          <b>Purpose:</b> Returns the leftmost characters of the string. 
Shorthand for a call to
+          <codeph>substr()</codeph> with 2 arguments.
+          <p>
+            <b>Return type:</b> <codeph>string</codeph>
+          </p>
+        </dd>
+
+      </dlentry>
+
+      <dlentry id="strright">
+
+        <dt>
+          <codeph>strright(string a, int num_chars)</codeph>
+        </dt>
+
+        <dd>
+          <indexterm audience="Cloudera">strright() function</indexterm>
+          <b>Purpose:</b> Returns the rightmost characters of the string. 
Shorthand for a call to
+          <codeph>substr()</codeph> with 2 arguments.
+          <p>
+            <b>Return type:</b> <codeph>string</codeph>
+          </p>
+        </dd>
+
+      </dlentry>
+
+      <dlentry id="substr">
+
+        <dt>
+          <codeph>substr(string a, int start [, int len]), <ph 
id="substring">substring(string a, int start [, int
+          len])</ph></codeph>
+        </dt>
+
+        <dd>
+          <indexterm audience="Cloudera">substr() function</indexterm>
+          <b>Purpose:</b> Returns the portion of the string starting at a 
specified point, optionally with a
+          specified maximum length. The characters in the string are indexed 
starting at 1.
+          <p>
+            <b>Return type:</b> <codeph>string</codeph>
+          </p>
+        </dd>
+
+      </dlentry>
+
+      <dlentry id="translate">
+
+        <dt>
+          <codeph>translate(string input, string from, string to)</codeph>
+        </dt>
+
+        <dd>
+          <indexterm audience="Cloudera">translate() function</indexterm>
+          <b>Purpose:</b> Returns the input string with a set of characters 
replaced by another set of characters.
+          <p>
+            <b>Return type:</b> <codeph>string</codeph>
+          </p>
+        </dd>
+
+      </dlentry>
+
+      <dlentry id="trim">
+
+        <dt>
+          <codeph>trim(string a)</codeph>
+        </dt>
+
+        <dd>
+          <indexterm audience="Cloudera">trim() function</indexterm>
+          <b>Purpose:</b> Returns the input string with both leading and 
trailing spaces removed. The same as
+          passing the string through both <codeph>ltrim()</codeph> and 
<codeph>rtrim()</codeph>.
+          <p>
+            <b>Usage notes:</b> Often used during data cleansing operations 
during the ETL cycle, if input values might still have surrounding spaces.
+            For a more general-purpose function that can remove other leading 
and trailing characters besides spaces, see <codeph>btrim()</codeph>.
+          </p>
+          <p>
+            <b>Return type:</b> <codeph>string</codeph>
+          </p>
+        </dd>
+
+      </dlentry>
+
+      <dlentry id="upper">
+
+        <dt>
+          <codeph>upper(string a), <ph id="ucase">ucase(string a)</ph></codeph>
+        </dt>
+
+        <dd>
+          <indexterm audience="Cloudera">upper() function</indexterm>
+          <indexterm audience="Cloudera">ucase() function</indexterm>
+          <b>Purpose:</b> Returns the argument string converted to 
all-uppercase.
+          <p>
+            <b>Return type:</b> <codeph>string</codeph>
+          </p>
+          <p conref="../shared/impala_common.xml#common/usage_notes_blurb"/>
+          <p 
conref="../shared/impala_common.xml#common/case_insensitive_comparisons_tip"/>
+        </dd>
+
+      </dlentry>
+    </dl>
+  </conbody>
+</concept>

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/3be0f122/docs/topics/impala_struct.xml
----------------------------------------------------------------------
diff --git a/docs/topics/impala_struct.xml b/docs/topics/impala_struct.xml
new file mode 100644
index 0000000..bddb1bb
--- /dev/null
+++ b/docs/topics/impala_struct.xml
@@ -0,0 +1,433 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE concept PUBLIC "-//OASIS//DTD DITA Concept//EN" "concept.dtd">
+<concept id="struct">
+
+  <title>STRUCT Complex Type (<keyword keyref="impala23"/> or higher 
only)</title>
+
+  <prolog>
+    <metadata>
+      <data name="Category" value="Impala"/>
+      <data name="Category" value="Impala Data Types"/>
+      <data name="Category" value="SQL"/>
+      <data name="Category" value="Developers"/>
+      <data name="Category" value="Data Analysts"/>
+    </metadata>
+  </prolog>
+
+  <conbody>
+
+    <p>
+      A complex data type, representing multiple fields of a single item. 
Frequently used as the element type of an <codeph>ARRAY</codeph>
+      or the <codeph>VALUE</codeph> part of a <codeph>MAP</codeph>.
+    </p>
+
+    <p conref="../shared/impala_common.xml#common/syntax_blurb"/>
+
+<codeblock><varname>column_name</varname> STRUCT &lt; <varname>name</varname> 
: <varname>type</varname> [COMMENT '<varname>comment_string</varname>'], ... 
&gt;
+
+type ::= <varname>primitive_type</varname> | <varname>complex_type</varname>
+</codeblock>
+
+    <p>
+      The names and number of fields within the <codeph>STRUCT</codeph> are 
fixed. Each field can be a different type. A field within a
+      <codeph>STRUCT</codeph> can also be another <codeph>STRUCT</codeph>, or 
an <codeph>ARRAY</codeph> or a <codeph>MAP</codeph>, allowing
+      you to create nested data structures with a maximum nesting depth of 100.
+    </p>
+
+    <p>
+      A <codeph>STRUCT</codeph> can be the top-level type for a column, or can 
itself be an item within an <codeph>ARRAY</codeph> or the
+      value part of the key-value pair in a <codeph>MAP</codeph>.
+    </p>
+
+    <p>
+      When a <codeph>STRUCT</codeph> is used as an <codeph>ARRAY</codeph> 
element or a <codeph>MAP</codeph> value, you use a join clause to
+      bring the <codeph>ARRAY</codeph> or <codeph>MAP</codeph> elements into 
the result set, and then refer to
+      
<codeph><varname>array_name</varname>.ITEM.<varname>field</varname></codeph> or
+      
<codeph><varname>map_name</varname>.VALUE.<varname>field</varname></codeph>. In 
the case of a <codeph>STRUCT</codeph> directly inside
+      an <codeph>ARRAY</codeph> or <codeph>MAP</codeph>, you can omit the 
<codeph>.ITEM</codeph> and <codeph>.VALUE</codeph> pseudocolumns
+      and refer directly to 
<codeph><varname>array_name</varname>.<varname>field</varname></codeph> or
+      <codeph><varname>map_name</varname>.<varname>field</varname></codeph>.
+    </p>
+
+    <p conref="../shared/impala_common.xml#common/usage_notes_blurb"/>
+
+    <p conref="../shared/impala_common.xml#common/complex_types_combo"/>
+
+    <p>
+      A <codeph>STRUCT</codeph> is similar conceptually to a table row: it 
contains a fixed number of named fields, each with a predefined
+      type. To combine two related tables, while using complex types to 
minimize repetition, the typical way to represent that data is as an
+      <codeph>ARRAY</codeph> of <codeph>STRUCT</codeph> elements.
+    </p>
+
+    <p>
+      Because a <codeph>STRUCT</codeph> has a fixed number of named fields, it 
typically does not make sense to have a
+      <codeph>STRUCT</codeph> as the type of a table column. In such a case, 
you could just make each field of the <codeph>STRUCT</codeph>
+      into a separate column of the table. The <codeph>STRUCT</codeph> type is 
most useful as an item of an <codeph>ARRAY</codeph> or the
+      value part of the key-value pair in a <codeph>MAP</codeph>. A nested 
type column with a <codeph>STRUCT</codeph> at the lowest level
+      lets you associate a variable number of row-like objects with each row 
of the table.
+    </p>
+
+    <p>
+      The <codeph>STRUCT</codeph> type is straightforward to reference within 
a query. You do not need to include the
+      <codeph>STRUCT</codeph> column in a join clause or give it a table 
alias, as is required for the <codeph>ARRAY</codeph> and
+      <codeph>MAP</codeph> types. You refer to the individual fields using dot 
notation, such as
+      
<codeph><varname>struct_column_name</varname>.<varname>field_name</varname></codeph>,
 without any pseudocolumn such as
+      <codeph>ITEM</codeph> or <codeph>VALUE</codeph>.
+    </p>
+
+    <p conref="../shared/impala_common.xml#common/complex_types_describe"/>
+
+    <p conref="../shared/impala_common.xml#common/internals_blurb"/>
+
+    <p>
+      Within the Parquet data file, the values for each 
<codeph>STRUCT</codeph> field are stored adjacent to each other, so that they 
can be
+      encoded and compressed using all the Parquet techniques for storing sets 
of similar or repeated values. The adjacency applies even
+      when the <codeph>STRUCT</codeph> values are part of an 
<codeph>ARRAY</codeph> or <codeph>MAP</codeph>. During a query, Impala avoids
+      unnecessary I/O by reading only the portions of the Parquet data file 
containing the requested <codeph>STRUCT</codeph> fields.
+    </p>
+
+    <p conref="../shared/impala_common.xml#common/added_in_230"/>
+
+    <p conref="../shared/impala_common.xml#common/restrictions_blurb"/>
+
+    <ul conref="../shared/impala_common.xml#common/complex_types_restrictions">
+      <li/>
+    </ul>
+
+    <p conref="../shared/impala_common.xml#common/example_blurb"/>
+
+    <note 
conref="../shared/impala_common.xml#common/complex_type_schema_pointer"/>
+
+    <p>
+      The following example shows a table with various kinds of 
<codeph>STRUCT</codeph> columns, both at the top level and nested within
+      other complex types. Practice the <codeph>CREATE TABLE</codeph> and 
query notation for complex type columns using empty tables, until
+      you can visualize a complex data structure and construct corresponding 
SQL statements reliably.
+    </p>
+
+<codeblock><![CDATA[CREATE TABLE struct_demo
+(
+  id BIGINT,
+  name STRING,
+
+-- A STRUCT as a top-level column. Demonstrates how the table ID column
+-- and the ID field within the STRUCT can coexist without a name conflict.
+  employee_info STRUCT < employer: STRING, id: BIGINT, address: STRING >,
+
+-- A STRUCT as the element type of an ARRAY.
+  places_lived ARRAY < STRUCT <street: STRING, city: STRING, country: STRING 
>>,
+
+-- A STRUCT as the value portion of the key-value pairs in a MAP.
+  memorable_moments MAP < STRING, STRUCT < year: INT, place: STRING, details: 
STRING >>,
+
+-- A STRUCT where one of the fields is another STRUCT.
+  current_address STRUCT < street_address: STRUCT <street_number: INT, 
street_name: STRING, street_type: STRING>, country: STRING, postal_code: STRING 
>
+)
+STORED AS PARQUET;
+]]>
+</codeblock>
+
+    <p>
+      The following example shows how to examine the structure of a table 
containing one or more <codeph>STRUCT</codeph> columns by using
+      the <codeph>DESCRIBE</codeph> statement. You can visualize each 
<codeph>STRUCT</codeph> as its own table, with columns named the same
+      as each field of the <codeph>STRUCT</codeph>. If the 
<codeph>STRUCT</codeph> is nested inside another complex type, such as
+      <codeph>ARRAY</codeph>, you can extend the qualified name passed to 
<codeph>DESCRIBE</codeph> until the output shows just the
+      <codeph>STRUCT</codeph> fields.
+    </p>
+
+<codeblock><![CDATA[DESCRIBE struct_demo;
++-------------------+--------------------------+
+| name              | type                     |
++-------------------+--------------------------+
+| id                | bigint                   |
+| name              | string                   |
+| employee_info     | struct<                  |
+|                   |   employer:string,       |
+|                   |   id:bigint,             |
+|                   |   address:string         |
+|                   | >                        |
+| places_lived      | array<struct<            |
+|                   |   street:string,         |
+|                   |   city:string,           |
+|                   |   country:string         |
+|                   | >>                       |
+| memorable_moments | map<string,struct<       |
+|                   |   year:int,              |
+|                   |   place:string,          |
+|                   |   details:string         |
+|                   | >>                       |
+| current_address   | struct<                  |
+|                   |   street_address:struct< |
+|                   |     street_number:int,   |
+|                   |     street_name:string,  |
+|                   |     street_type:string   |
+|                   |   >,                     |
+|                   |   country:string,        |
+|                   |   postal_code:string     |
+|                   | >                        |
++-------------------+--------------------------+
+]]>
+</codeblock>
+
+    <p>
+      The top-level column <codeph>EMPLOYEE_INFO</codeph> is a 
<codeph>STRUCT</codeph>. Describing
+      
<codeph><varname>table_name</varname>.<varname>struct_name</varname></codeph> 
displays the fields of the <codeph>STRUCT</codeph> as if
+      they were columns of a table:
+    </p>
+
+<codeblock><![CDATA[DESCRIBE struct_demo.employee_info;
++----------+--------+
+| name     | type   |
++----------+--------+
+| employer | string |
+| id       | bigint |
+| address  | string |
++----------+--------+
+]]>
+</codeblock>
+
+    <p>
+      Because <codeph>PLACES_LIVED</codeph> is a <codeph>STRUCT</codeph> 
inside an <codeph>ARRAY</codeph>, the initial
+      <codeph>DESCRIBE</codeph> shows the structure of the 
<codeph>ARRAY</codeph>:
+    </p>
+
+<codeblock><![CDATA[DESCRIBE struct_demo.places_lived;
++------+------------------+
+| name | type             |
++------+------------------+
+| item | struct<          |
+|      |   street:string, |
+|      |   city:string,   |
+|      |   country:string |
+|      | >                |
+| pos  | bigint           |
++------+------------------+
+]]>
+</codeblock>
+
+    <p>
+      Ask for the details of the <codeph>ITEM</codeph> field of the 
<codeph>ARRAY</codeph> to see just the layout of the
+      <codeph>STRUCT</codeph>:
+    </p>
+
+<codeblock><![CDATA[DESCRIBE struct_demo.places_lived.item;
++---------+--------+
+| name    | type   |
++---------+--------+
+| street  | string |
+| city    | string |
+| country | string |
++---------+--------+
+]]>
+</codeblock>
+
+    <p>
+      Likewise, <codeph>MEMORABLE_MOMENTS</codeph> has a 
<codeph>STRUCT</codeph> inside a <codeph>MAP</codeph>, which requires an extra
+      level of qualified name to see just the <codeph>STRUCT</codeph> part:
+    </p>
+
+<codeblock><![CDATA[DESCRIBE struct_demo.memorable_moments;
++-------+------------------+
+| name  | type             |
++-------+------------------+
+| key   | string           |
+| value | struct<          |
+|       |   year:int,      |
+|       |   place:string,  |
+|       |   details:string |
+|       | >                |
++-------+------------------+
+]]>
+</codeblock>
+
+    <p>
+      For a <codeph>MAP</codeph>, ask to see the <codeph>VALUE</codeph> field 
to see the corresponding <codeph>STRUCT</codeph> fields in a
+      table-like structure:
+    </p>
+
+<codeblock><![CDATA[DESCRIBE struct_demo.memorable_moments.value;
++---------+--------+
+| name    | type   |
++---------+--------+
+| year    | int    |
+| place   | string |
+| details | string |
++---------+--------+
+]]>
+</codeblock>
+
+    <p>
+      For a <codeph>STRUCT</codeph> inside a <codeph>STRUCT</codeph>, we can 
see the fields of the outer <codeph>STRUCT</codeph>:
+    </p>
+
+<codeblock><![CDATA[DESCRIBE struct_demo.current_address;
++----------------+-----------------------+
+| name           | type                  |
++----------------+-----------------------+
+| street_address | struct<               |
+|                |   street_number:int,  |
+|                |   street_name:string, |
+|                |   street_type:string  |
+|                | >                     |
+| country        | string                |
+| postal_code    | string                |
++----------------+-----------------------+
+]]>
+</codeblock>
+
+    <p>
+      Then we can use a further qualified name to see just the fields of the 
inner <codeph>STRUCT</codeph>:
+    </p>
+
+<codeblock><![CDATA[DESCRIBE struct_demo.current_address.street_address;
++---------------+--------+
+| name          | type   |
++---------------+--------+
+| street_number | int    |
+| street_name   | string |
+| street_type   | string |
++---------------+--------+
+]]>
+</codeblock>
+
+    <p>
+      The following example shows how to examine the structure of a table 
containing one or more <codeph>STRUCT</codeph> columns by using
+      the <codeph>DESCRIBE</codeph> statement. You can visualize each 
<codeph>STRUCT</codeph> as its own table, with columns named the same
+      as each field of the <codeph>STRUCT</codeph>. If the 
<codeph>STRUCT</codeph> is nested inside another complex type, such as
+      <codeph>ARRAY</codeph>, you can extend the qualified name passed to 
<codeph>DESCRIBE</codeph> until the output shows just the
+      <codeph>STRUCT</codeph> fields.
+    </p>
+
+<!-- To do: See why the most verbose query form gives an error. -->
+
+<codeblock><![CDATA[DESCRIBE struct_demo;
++-------------------+--------------------------+---------+
+| name              | type                     | comment |
++-------------------+--------------------------+---------+
+| id                | bigint                   |         |
+| name              | string                   |         |
+| employee_info     | struct<                  |         |
+|                   |   employer:string,       |         |
+|                   |   id:bigint,             |         |
+|                   |   address:string         |         |
+|                   | >                        |         |
+| places_lived      | array<struct<            |         |
+|                   |   street:string,         |         |
+|                   |   city:string,           |         |
+|                   |   country:string         |         |
+|                   | >>                       |         |
+| memorable_moments | map<string,struct<       |         |
+|                   |   year:int,              |         |
+|                   |   place:string,          |         |
+|                   |   details:string         |         |
+|                   | >>                       |         |
+| current_address   | struct<                  |         |
+|                   |   street_address:struct< |         |
+|                   |     street_number:int,   |         |
+|                   |     street_name:string,  |         |
+|                   |     street_type:string   |         |
+|                   |   >,                     |         |
+|                   |   country:string,        |         |
+|                   |   postal_code:string     |         |
+|                   | >                        |         |
++-------------------+--------------------------+---------+
+
+SELECT id, employee_info.id FROM struct_demo;
+
+SELECT id, employee_info.id AS employee_id FROM struct_demo;
+
+SELECT id, employee_info.id AS employee_id, employee_info.employer
+  FROM struct_demo;
+
+SELECT id, name, street, city, country
+  FROM struct_demo, struct_demo.places_lived;
+
+SELECT id, name, places_lived.pos, places_lived.street, places_lived.city, 
places_lived.country
+  FROM struct_demo, struct_demo.places_lived;
+
+SELECT id, name, pl.pos, pl.street, pl.city, pl.country
+  FROM struct_demo, struct_demo.places_lived AS pl;
+
+SELECT id, name, places_lived.pos, places_lived.street, places_lived.city, 
places_lived.country
+  FROM struct_demo, struct_demo.places_lived;
+
+SELECT id, name, pos, street, city, country
+  FROM struct_demo, struct_demo.places_lived;
+
+SELECT id, name, memorable_moments.key,
+  memorable_moments.value.year,
+  memorable_moments.value.place,
+  memorable_moments.value.details
+FROM struct_demo, struct_demo.memorable_moments
+WHERE memorable_moments.key IN ('Birthday','Anniversary','Graduation');
+
+SELECT id, name, mm.key, mm.value.year, mm.value.place, mm.value.details
+  FROM struct_demo, struct_demo.memorable_moments AS mm
+WHERE mm.key IN ('Birthday','Anniversary','Graduation');
+
+SELECT id, name, memorable_moments.key, memorable_moments.value.year,
+  memorable_moments.value.place, memorable_moments.value.details
+FROM struct_demo, struct_demo.memorable_moments
+WHERE key IN ('Birthday','Anniversary','Graduation');
+
+SELECT id, name, key, value.year, value.place, value.details
+  FROM struct_demo, struct_demo.memorable_moments
+WHERE key IN ('Birthday','Anniversary','Graduation');
+
+SELECT id, name, key, year, place, details
+  FROM struct_demo, struct_demo.memorable_moments
+WHERE key IN ('Birthday','Anniversary','Graduation');
+
+SELECT id, name,
+  current_address.street_address.street_number,
+  current_address.street_address.street_name,
+  current_address.street_address.street_type,
+  current_address.country,
+  current_address.postal_code
+FROM struct_demo;
+]]>
+</codeblock>
+
+    <p>
+      For example, this table uses a struct that encodes several data values 
for each phone number associated with a person. Each person can
+      have a variable-length array of associated phone numbers, and queries 
can refer to the category field to locate specific home, work,
+      mobile, and so on kinds of phone numbers.
+    </p>
+
+<codeblock><![CDATA[CREATE TABLE contact_info_many_structs
+(
+  id BIGINT, name STRING,
+  phone_numbers ARRAY < STRUCT <category:STRING, country_code:STRING, 
area_code:SMALLINT, full_number:STRING, mobile:BOOLEAN, carrier:STRING > >
+) STORED AS PARQUET;
+]]>
+</codeblock>
+
+    <p>
+      Because structs are naturally suited to composite values where the 
fields have different data types, you might use them to decompose
+      things such as addresses:
+    </p>
+
+<codeblock><![CDATA[CREATE TABLE contact_info_detailed_address
+(
+  id BIGINT, name STRING,
+  address STRUCT < house_number:INT, street:STRING, street_type:STRING, 
apartment:STRING, city:STRING, region:STRING, country:STRING >
+);
+]]>
+</codeblock>
+
+    <p>
+      In a big data context, splitting out data fields such as the number part 
of the address and the street name could let you do analysis
+      on each field independently. For example, which streets have the largest 
number range of addresses, what are the statistical
+      properties of the street names, which areas have a higher proportion of 
<q>Roads</q>, <q>Courts</q> or <q>Boulevards</q>, and so on.
+    </p>
+
+    <p conref="../shared/impala_common.xml#common/related_info"/>
+
+    <p>
+      <xref href="impala_complex_types.xml#complex_types"/>, <xref 
href="impala_array.xml#array"/>,
+<!-- <xref href="impala_struct.xml#struct"/>, -->
+      <xref href="impala_map.xml#map"/>
+    </p>
+
+  </conbody>
+
+</concept>

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/3be0f122/docs/topics/impala_subqueries.xml
----------------------------------------------------------------------
diff --git a/docs/topics/impala_subqueries.xml 
b/docs/topics/impala_subqueries.xml
new file mode 100644
index 0000000..880f15c
--- /dev/null
+++ b/docs/topics/impala_subqueries.xml
@@ -0,0 +1,320 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE concept PUBLIC "-//OASIS//DTD DITA Concept//EN" "concept.dtd">
+<concept rev="2.0.0" id="subqueries">
+
+  <title>Subqueries in Impala SELECT Statements</title>
+  <titlealts audience="PDF"><navtitle>Subqueries</navtitle></titlealts>
+  <prolog>
+    <metadata>
+      <data name="Category" value="Impala"/>
+      <data name="Category" value="SQL"/>
+      <data name="Category" value="Querying"/>
+      <data name="Category" value="Developers"/>
+      <data name="Category" value="Data Analysts"/>
+    </metadata>
+  </prolog>
+
+  <conbody>
+
+    <p rev="2.0.0">
+      <indexterm audience="Cloudera">subqueries</indexterm>
+      A <term>subquery</term> is a query that is nested within another query. 
Subqueries let queries on one table
+      dynamically adapt based on the contents of another table. This technique 
provides great flexibility and
+      expressive power for SQL queries.
+    </p>
+
+    <p>
+      A subquery can return a result set for use in the <codeph>FROM</codeph> 
or <codeph>WITH</codeph> clauses, or
+      with operators such as <codeph>IN</codeph> or <codeph>EXISTS</codeph>.
+    </p>
+
+    <p>
+      A <term>scalar subquery</term> produces a result set with a single row 
containing a single column, typically
+      produced by an aggregation function such as <codeph>MAX()</codeph> or 
<codeph>SUM()</codeph>. This single
+      result value can be substituted in scalar contexts such as arguments to 
comparison operators. If the result
+      set is empty, the value of the scalar subquery is <codeph>NULL</codeph>. 
For example, the following query
+      finds the maximum value of <codeph>T2.Y</codeph> and then substitutes 
that value into the
+      <codeph>WHERE</codeph> clause of the outer block that queries 
<codeph>T1</codeph>:
+    </p>
+
+<codeblock>SELECT x FROM t1 WHERE x &gt; (SELECT MAX(y) FROM t2);
+</codeblock>
+
+    <p>
+      <term>Uncorrelated subqueries</term> do not refer to any tables from the 
outer block of the query. The same
+      value or set of values produced by the subquery is used when evaluating 
each row from the outer query block.
+      In this example, the subquery returns an arbitrary number of values from 
<codeph>T2.Y</codeph>, and each
+      value of <codeph>T1.X</codeph> is tested for membership in that same set 
of values:
+    </p>
+
+<codeblock>SELECT x FROM t1 WHERE x IN (SELECT y FROM t2);
+</codeblock>
+
+    <p>
+      <term>Correlated subqueries</term> compare one or more values from the 
outer query block to values referenced
+      in the <codeph>WHERE</codeph> clause of the subquery. Each row evaluated 
by the outer <codeph>WHERE</codeph>
+      clause can be evaluated using a different set of values. These kinds of 
subqueries are restricted in the
+      kinds of comparisons they can do between columns of the inner and outer 
tables. (See the following
+      <b>Restrictions</b> item.)
+    </p>
+
+    <p>
+      For example, the following query finds all the employees with salaries 
that are higher than average for their
+      department. The subquery potentially computes a different 
<codeph>AVG()</codeph> value for each employee.
+    </p>
+
+<!-- TK: Construct an EMPLOYEES schema to try out examples like these. -->
+
+<codeblock>SELECT employee_name, employee_id FROM employees one WHERE
+  salary &gt; (SELECT avg(salary) FROM employees two WHERE one.dept_id = 
two.dept_id);
+</codeblock>
+
+    <p conref="../shared/impala_common.xml#common/syntax_blurb"/>
+
+    <p>
+      <b>Subquery in the <codeph>FROM</codeph> clause:</b>
+    </p>
+
+<codeblock>SELECT <varname>select_list</varname> FROM 
<varname>table_ref</varname> [, <varname>table_ref</varname> ...]
+
+<varname>table_ref</varname> ::= <varname>table_name</varname> | 
(<varname>select_statement</varname>)
+</codeblock>
+
+    <p>
+      <b>Subqueries in <codeph>WHERE</codeph> clause:</b>
+    </p>
+
+<codeblock>WHERE <varname>value</varname> 
<varname>comparison_operator</varname> 
(<varname>scalar_select_statement</varname>)
+WHERE <varname>value</varname> [NOT] IN (<varname>select_statement</varname>)
+WHERE [NOT] EXISTS (<varname>correlated_select_statement</varname>)
+WHERE NOT EXISTS (<varname>correlated_select_statement</varname>)
+</codeblock>
+
+    <p>
+      <codeph>comparison_operator</codeph> is a numeric comparison such as 
<codeph>=</codeph>,
+      <codeph>&lt;=</codeph>, <codeph>!=</codeph>, and so on, or a string 
comparison operator such as
+      <codeph>LIKE</codeph> or <codeph>REGEXP</codeph>.
+    </p>
+
+    <p rev="2.0.0">
+      Although you can use non-equality comparison operators such as 
<codeph>&lt;</codeph> or
+      <codeph>&gt;=</codeph>, the subquery must include at least one equality 
comparison between the columns of the
+      inner and outer query blocks.
+    </p>
+
+    <p>
+      All syntax is available for both correlated and uncorrelated queries, 
except that the <codeph>NOT
+      EXISTS</codeph> clause cannot be used with an uncorrelated subquery.
+    </p>
+
+    <p>
+      Impala subqueries can be nested arbitrarily deep.
+    </p>
+
+    <p conref="../shared/impala_common.xml#common/sql1999"/>
+
+    <p conref="../shared/impala_common.xml#common/example_blurb"/>
+
+    <p>
+      This example illustrates how subqueries can be used in the 
<codeph>FROM</codeph> clause to organize the table
+      names, column names, and column values by producing intermediate result 
sets, especially for join queries.
+    </p>
+
+<codeblock>SELECT avg(t1.x), max(t2.y) FROM
+  (SELECT id, cast(a AS DECIMAL(10,5)) AS x FROM raw_data WHERE a BETWEEN 0 
AND 100) AS t1
+  JOIN
+  (SELECT id, length(s) AS y FROM raw_data WHERE s LIKE 'A%') AS t2;
+  USING (id);
+</codeblock>
+
+    <p rev="2.0.0">
+      These examples show how a query can test for the existence of values in 
a separate table using the
+      <codeph>EXISTS()</codeph> operator with a subquery.
+<!--
+Internally, these queries are processed in a way similar to join queries.
+Because the values from the second table are not part of the result set, the 
subquery
+is more efficient than the equivalent join query.
+-->
+    </p>
+
+    <p>
+      The following examples show how a value can be compared against a set of 
values returned by a subquery.
+    </p>
+
+<codeblock rev="2.0.0">SELECT count(x) FROM t1 WHERE EXISTS(SELECT 1 FROM t2 
WHERE t1.x = t2.y * 10);
+
+SELECT x FROM t1 WHERE x IN (SELECT y FROM t2 WHERE state = 'CA');
+</codeblock>
+
+    <p>
+      The following examples demonstrate scalar subqueries. When a subquery is 
known to return a single value, you
+      can substitute it where you would normally put a constant value.
+    </p>
+
+<codeblock>SELECT x FROM t1 WHERE y = (SELECT max(z) FROM t2);
+SELECT x FROM t1 WHERE y &gt; (SELECT count(z) FROM t2);
+</codeblock>
+
+<!-- <p conref="../shared/impala_common.xml#common/partitioning_blurb"/> -->
+
+<!--
+<p conref="../shared/impala_common.xml#common/hbase_blurb"/>
+<p>
+Currently, the <codeph>IN (<varname>subquery</varname>)</codeph> operator 
results in a full table scan
+of an HBase table, rather than being translated into a series of single-row 
lookups.
+Therefore, this is not an efficient construct to use with Impala queries for 
HBase tables.
+</p>
+-->
+
+<!--
+<p conref="../shared/impala_common.xml#common/parquet_blurb"/>
+<p conref="../shared/impala_common.xml#common/text_blurb"/>
+<p conref="../shared/impala_common.xml#common/compatibility_blurb"/>
+-->
+
+    <p conref="../shared/impala_common.xml#common/usage_notes_blurb"/>
+
+    <p>
+      If the same table is referenced in both the outer and inner query 
blocks, construct a table alias in the
+      outer query block and use a fully qualified name to distinguish the 
inner and outer table references:
+    </p>
+
+<!-- TK: verify the logic of this example. Probably have other similar ones 
that could be reused here. -->
+
+<codeblock>SELECT * FROM t1 one WHERE id IN (SELECT parent FROM t1 two WHERE 
t1.parent = t2.id);
+</codeblock>
+
+    <p conref="../shared/impala_common.xml#common/internals_blurb"/>
+
+    <p>
+      Internally, subqueries involving <codeph>IN</codeph>, <codeph>NOT 
IN</codeph>, <codeph>EXISTS</codeph>, or
+      <codeph>NOT EXISTS</codeph> clauses are rewritten into join queries. 
Depending on the syntax, the subquery
+      might be rewritten to an outer join, semi join, cross join, or anti join.
+    </p>
+
+    <p>
+      A query is processed differently depending on whether the subquery calls 
any aggregation functions. There are
+      correlated and uncorrelated forms, with and without calls to aggregation 
functions. Each of these four
+      categories is rewritten differently.
+    </p>
+
+    <p conref="../shared/impala_common.xml#common/column_stats_blurb"/>
+
+    <p>
+      Because queries that include correlated and uncorrelated subqueries in 
the <codeph>WHERE</codeph> clause are
+      written into join queries, to achieve best performance, follow the same 
guidelines for running the
+      <codeph>COMPUTE STATS</codeph> statement as you do for tables involved 
in regular join queries. Run the
+      <codeph>COMPUTE STATS</codeph> statement for each associated tables 
after loading or substantially changing
+      the data in that table. See <xref 
href="impala_perf_stats.xml#perf_stats"/> for details.
+    </p>
+
+    <p>
+      <b>Added in:</b> Subqueries are substantially enhanced starting in 
Impala 2.0 for CDH 4, and CDH 5.2.0. Now,
+      they can be used in the <codeph>WHERE</codeph> clause, in combination 
with clauses such as
+      <codeph>EXISTS</codeph> and <codeph>IN</codeph>, rather than just in the 
<codeph>FROM</codeph> clause.
+    </p>
+
+    <p conref="../shared/impala_common.xml#common/restrictions_blurb"/>
+
+    <p>
+      The initial Impala support for nested subqueries addresses the most 
common use cases. Some restrictions
+      remain:
+    </p>
+
+    <ul>
+      <li>
+        <p>
+          Although you can use subqueries in a query involving 
<codeph>UNION</codeph> or <codeph>UNION ALL</codeph>
+          in Impala 2.1.0 and higher, currently you cannot construct a union 
of two subqueries (for example, in the
+          argument of an <codeph>IN</codeph> or <codeph>EXISTS</codeph> 
operator).
+        </p>
+      </li>
+
+      <li>
+        <p>
+          Subqueries returning scalar values cannot be used with the operators 
<codeph>ANY</codeph> or
+          <codeph>ALL</codeph>. (Impala does not currently have a 
<codeph>SOME</codeph> operator, but if it did,
+          the same restriction would apply.)
+        </p>
+      </li>
+
+      <li>
+        <p>
+          For the <codeph>EXISTS</codeph> and <codeph>NOT EXISTS</codeph> 
clauses, any subquery comparing values
+          from the outer query block to another table must use at least one 
equality comparison, not exclusively
+          other kinds of comparisons such as less than, greater than, 
<codeph>BETWEEN</codeph>, or
+          <codeph>!=</codeph>.
+        </p>
+      </li>
+
+      <li>
+<!-- TK: think this is no longer true. -->
+        <p>
+          Currently, a scalar subquery cannot be used as the first or second 
argument to the
+          <codeph>BETWEEN</codeph> operator.
+        </p>
+      </li>
+
+      <li>
+        <p>
+          A subquery cannot be used inside an <codeph>OR</codeph> conjunction. 
Expressions inside a subquery, for
+          example in the <codeph>WHERE</codeph> clause, can use 
<codeph>OR</codeph> conjunctions; the restriction
+          only applies to parts of the query <q>above</q> the subquery.
+        </p>
+      </li>
+
+      <li>
+        <p>
+          Scalar subqueries are only supported in numeric contexts. You cannot 
use a scalar subquery as an argument
+          to the <codeph>LIKE</codeph>, <codeph>REGEXP</codeph>, or 
<codeph>RLIKE</codeph> operators, or compare it
+          to a value of a non-numeric type such as <codeph>TIMESTAMP</codeph> 
or <codeph>BOOLEAN</codeph>.
+        </p>
+      </li>
+
+      <li>
+        <p>
+<!-- A subquery cannot be used to generate a scalar value for a function call. 
-->
+          You cannot use subqueries with the <codeph>CASE</codeph> function to 
generate the comparison value, the
+          values to be compared against, or the return value.
+        </p>
+      </li>
+
+      <li>
+        <p>
+          A subquery is not allowed in the filter condition for the 
<codeph>HAVING</codeph> clause. (Strictly
+          speaking, a subquery cannot appear anywhere outside the 
<codeph>WITH</codeph>, <codeph>FROM</codeph>, and
+          <codeph>WHERE</codeph> clauses.)
+        </p>
+      </li>
+
+      <li>
+        <p>
+          You must use a fully qualified name
+          
(<codeph><varname>table_name</varname>.<varname>column_name</varname></codeph> 
or
+          
<codeph><varname>database_name</varname>.<varname>table_name</varname>.<varname>column_name</varname></codeph>)
+          when referring to any column from the outer query block within a 
subquery.
+        </p>
+      </li>
+    </ul>
+
+    <p conref="../shared/impala_common.xml#common/complex_types_blurb"/>
+
+    <p rev="2.3.0">
+      For the complex types (<codeph>ARRAY</codeph>, <codeph>STRUCT</codeph>, 
and <codeph>MAP</codeph>)
+      available in <keyword keyref="impala23_full"/> and higher, the join 
queries that <q>unpack</q> complex type
+      columns often use correlated subqueries in the <codeph>FROM</codeph> 
clause.
+      For example, if the first table in the join clause is 
<codeph>CUSTOMER</codeph>, the second
+      join clause might have a subquery that selects from the column 
<codeph>CUSTOMER.C_ORDERS</codeph>,
+      which is an <codeph>ARRAY</codeph>. The subquery re-evaluates the 
<codeph>ARRAY</codeph> elements
+      corresponding to each row from the <codeph>CUSTOMER</codeph> table.
+      See <xref href="impala_complex_types.xml#complex_types"/> for details 
and examples of
+      using subqueries with complex types.
+    </p>
+
+    <p conref="../shared/impala_common.xml#common/related_info"/>
+
+    <p>
+      <xref href="impala_operators.xml#exists"/>, <xref 
href="impala_operators.xml#in"/>
+    </p>
+  </conbody>
+</concept>

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/3be0f122/docs/topics/impala_sum.xml
----------------------------------------------------------------------
diff --git a/docs/topics/impala_sum.xml b/docs/topics/impala_sum.xml
new file mode 100644
index 0000000..cb9c453
--- /dev/null
+++ b/docs/topics/impala_sum.xml
@@ -0,0 +1,239 @@
+<?xml version="1.0" encoding="UTF-8"?><!DOCTYPE concept PUBLIC "-//OASIS//DTD 
DITA Concept//EN" "concept.dtd">
+<concept id="sum">
+
+  <title>SUM Function</title>
+  <titlealts audience="PDF"><navtitle>SUM</navtitle></titlealts>
+  <prolog>
+    <metadata>
+      <data name="Category" value="Impala"/>
+      <data name="Category" value="SQL"/>
+      <data name="Category" value="Impala Functions"/>
+      <data name="Category" value="Analytic Functions"/>
+      <data name="Category" value="Aggregate Functions"/>
+      <data name="Category" value="Querying"/>
+      <data name="Category" value="Developers"/>
+      <data name="Category" value="Data Analysts"/>
+    </metadata>
+  </prolog>
+
+  <conbody>
+
+    <p>
+      <indexterm audience="Cloudera">sum() function</indexterm>
+      An aggregate function that returns the sum of a set of numbers. Its 
single argument can be numeric column, or
+      the numeric result of a function or expression applied to the column 
value. Rows with a <codeph>NULL</codeph>
+      value for the specified column are ignored. If the table is empty, or 
all the values supplied to
+      <codeph>MIN</codeph> are <codeph>NULL</codeph>, <codeph>SUM</codeph> 
returns <codeph>NULL</codeph>.
+    </p>
+
+    <p conref="../shared/impala_common.xml#common/syntax_blurb"/>
+
+<codeblock>SUM([DISTINCT | ALL] <varname>expression</varname>) [OVER 
(<varname>analytic_clause</varname>)]</codeblock>
+
+    <p>
+      When the query contains a <codeph>GROUP BY</codeph> clause, returns one 
value for each combination of
+      grouping values.
+    </p>
+
+    <p>
+      <b>Return type:</b> <codeph>BIGINT</codeph> for integer arguments, 
<codeph>DOUBLE</codeph> for floating-point
+      arguments
+    </p>
+
+    <p conref="../shared/impala_common.xml#common/complex_types_blurb"/>
+
+    <p 
conref="../shared/impala_common.xml#common/complex_types_aggregation_explanation"/>
+
+    <p 
conref="../shared/impala_common.xml#common/complex_types_aggregation_example"/>
+
+    <p conref="../shared/impala_common.xml#common/example_blurb"/>
+
+    <p>
+      The following example shows how to use <codeph>SUM()</codeph> to compute 
the total for all the values in the
+      table, a subset of values, or the sum for each combination of values in 
the <codeph>GROUP BY</codeph> clause:
+    </p>
+
+<codeblock>-- Total all the values for this column in the table.
+select sum(c1) from t1;
+-- Find the total for this column from a subset of the table.
+select sum(c1) from t1 where month = 'January' and year = '2013';
+-- Find the total from a set of numeric function results.
+select sum(length(s)) from t1;
+-- Often used with functions that return predefined values to compute a score.
+select sum(case when grade = 'A' then 1.0 when grade = 'B' then 0.75 else 0) 
as class_honors from test_scores;
+-- Can also be used in combination with DISTINCT and/or GROUP BY.
+-- Return more than one result.
+select month, year, sum(purchase_price) from store_stats group by month, year;
+-- Filter the input to eliminate duplicates before performing the calculation.
+select sum(distinct x) from t1;
+</codeblock>
+
+    <p rev="2.0.0">
+      The following examples show how to use <codeph>SUM()</codeph> in an 
analytic context. They use a table
+      containing integers from 1 to 10. Notice how the <codeph>SUM()</codeph> 
is reported for each input value, as
+      opposed to the <codeph>GROUP BY</codeph> clause which condenses the 
result set.
+<codeblock>select x, property, sum(x) <b>over (partition by property)</b> as 
sum from int_t where property in ('odd','even');
++----+----------+-----+
+| x  | property | sum |
++----+----------+-----+
+| 2  | even     | 30  |
+| 4  | even     | 30  |
+| 6  | even     | 30  |
+| 8  | even     | 30  |
+| 10 | even     | 30  |
+| 1  | odd      | 25  |
+| 3  | odd      | 25  |
+| 5  | odd      | 25  |
+| 7  | odd      | 25  |
+| 9  | odd      | 25  |
++----+----------+-----+
+</codeblock>
+
+Adding an <codeph>ORDER BY</codeph> clause lets you experiment with results 
that are cumulative or apply to a moving
+set of rows (the <q>window</q>). The following examples use 
<codeph>SUM()</codeph> in an analytic context
+(that is, with an <codeph>OVER()</codeph> clause) to produce a running total 
of all the even values,
+then a running total of all the odd values. The basic <codeph>ORDER BY 
x</codeph> clause implicitly
+activates a window clause of <codeph>RANGE BETWEEN UNBOUNDED PRECEDING AND 
CURRENT ROW</codeph>,
+which is effectively the same as <codeph>ROWS BETWEEN UNBOUNDED PRECEDING AND 
CURRENT ROW</codeph>,
+therefore all of these examples produce the same results:
+<codeblock>select x, property,
+  sum(x) over (partition by property <b>order by x</b>) as 'cumulative total'
+  from int_t where property in ('odd','even');
++----+----------+------------------+
+| x  | property | cumulative total |
++----+----------+------------------+
+| 2  | even     | 2                |
+| 4  | even     | 6                |
+| 6  | even     | 12               |
+| 8  | even     | 20               |
+| 10 | even     | 30               |
+| 1  | odd      | 1                |
+| 3  | odd      | 4                |
+| 5  | odd      | 9                |
+| 7  | odd      | 16               |
+| 9  | odd      | 25               |
++----+----------+------------------+
+
+select x, property,
+  sum(x) over
+  (
+    partition by property
+    <b>order by x</b>
+    <b>range between unbounded preceding and current row</b>
+  ) as 'cumulative total'
+from int_t where property in ('odd','even');
++----+----------+------------------+
+| x  | property | cumulative total |
++----+----------+------------------+
+| 2  | even     | 2                |
+| 4  | even     | 6                |
+| 6  | even     | 12               |
+| 8  | even     | 20               |
+| 10 | even     | 30               |
+| 1  | odd      | 1                |
+| 3  | odd      | 4                |
+| 5  | odd      | 9                |
+| 7  | odd      | 16               |
+| 9  | odd      | 25               |
++----+----------+------------------+
+
+select x, property,
+  sum(x) over
+  (
+    partition by property
+    <b>order by x</b>
+    <b>rows between unbounded preceding and current row</b>
+  ) as 'cumulative total'
+  from int_t where property in ('odd','even');
++----+----------+------------------+
+| x  | property | cumulative total |
++----+----------+------------------+
+| 2  | even     | 2                |
+| 4  | even     | 6                |
+| 6  | even     | 12               |
+| 8  | even     | 20               |
+| 10 | even     | 30               |
+| 1  | odd      | 1                |
+| 3  | odd      | 4                |
+| 5  | odd      | 9                |
+| 7  | odd      | 16               |
+| 9  | odd      | 25               |
++----+----------+------------------+
+</codeblock>
+
+Changing the direction of the <codeph>ORDER BY</codeph> clause causes the 
intermediate
+results of the cumulative total to be calculated in a different order:
+
+<codeblock>select sum(x) over (partition by property <b>order by x desc</b>) 
as 'cumulative total'
+  from int_t where property in ('odd','even');
++----+----------+------------------+
+| x  | property | cumulative total |
++----+----------+------------------+
+| 10 | even     | 10               |
+| 8  | even     | 18               |
+| 6  | even     | 24               |
+| 4  | even     | 28               |
+| 2  | even     | 30               |
+| 9  | odd      | 9                |
+| 7  | odd      | 16               |
+| 5  | odd      | 21               |
+| 3  | odd      | 24               |
+| 1  | odd      | 25               |
++----+----------+------------------+
+</codeblock>
+
+The following examples show how to construct a moving window, with a running 
total taking into account 1 row before
+and 1 row after the current row, within the same partition (all the even 
values or all the odd values).
+Because of a restriction in the Impala <codeph>RANGE</codeph> syntax, this 
type of
+moving window is possible with the <codeph>ROWS BETWEEN</codeph> clause but 
not the <codeph>RANGE BETWEEN</codeph>
+clause:
+<codeblock>select x, property,
+  sum(x) over
+  (
+    partition by property
+    <b>order by x</b>
+    <b>rows between 1 preceding and 1 following</b>
+  ) as 'moving total'
+  from int_t where property in ('odd','even');
++----+----------+--------------+
+| x  | property | moving total |
++----+----------+--------------+
+| 2  | even     | 6            |
+| 4  | even     | 12           |
+| 6  | even     | 18           |
+| 8  | even     | 24           |
+| 10 | even     | 18           |
+| 1  | odd      | 4            |
+| 3  | odd      | 9            |
+| 5  | odd      | 15           |
+| 7  | odd      | 21           |
+| 9  | odd      | 16           |
++----+----------+--------------+
+
+-- Doesn't work because of syntax restriction on RANGE clause.
+select x, property,
+  sum(x) over
+  (
+    partition by property
+    <b>order by x</b>
+    <b>range between 1 preceding and 1 following</b>
+  ) as 'moving total'
+from int_t where property in ('odd','even');
+ERROR: AnalysisException: RANGE is only supported with both the lower and 
upper bounds UNBOUNDED or one UNBOUNDED and the other CURRENT ROW.
+</codeblock>
+    </p>
+
+    <p conref="../shared/impala_common.xml#common/restrictions_blurb"/>
+
+<!-- This conref appears under SUM(), AVG(), FLOAT, and DOUBLE topics. -->
+
+    <p conref="../shared/impala_common.xml#common/sum_double"/>
+
+    <p conref="../shared/impala_common.xml#common/related_info"/>
+
+    <p>
+      <xref href="impala_analytic_functions.xml#analytic_functions"/>
+    </p>
+
+  </conbody>
+</concept>

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/3be0f122/docs/topics/impala_support_start_over.xml
----------------------------------------------------------------------
diff --git a/docs/topics/impala_support_start_over.xml 
b/docs/topics/impala_support_start_over.xml
new file mode 100644
index 0000000..906efaa
--- /dev/null
+++ b/docs/topics/impala_support_start_over.xml
@@ -0,0 +1,30 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE concept PUBLIC "-//OASIS//DTD DITA Concept//EN" "concept.dtd">
+<concept id="support_start_over">
+
+  <title>SUPPORT_START_OVER Query Option</title>
+  <titlealts audience="PDF"><navtitle>SUPPORT_START_OVER</navtitle></titlealts>
+  <prolog>
+    <metadata>
+      <data name="Category" value="Impala"/>
+      <data name="Category" value="Impala Query Options"/>
+    </metadata>
+  </prolog>
+
+  <conbody>
+
+    <p>
+      <indexterm audience="Cloudera">SUPPORT_START_OVER query 
option</indexterm>
+      Leave this setting at its default value.
+      It is a read-only setting, tested by some client applications such as 
Hue.
+    </p>
+    <p>
+      If you accidentally change it through <cmdname>impala-shell</cmdname>,
+      subsequent queries encounter errors until you undo the change
+      by issuing <codeph>UNSET support_start_over</codeph>.
+    </p>
+
+    <p conref="../shared/impala_common.xml#common/type_boolean"/>
+    <p conref="../shared/impala_common.xml#common/default_false"/>
+  </conbody>
+</concept>

[05/51] [partial] incubator-impala git commit: IMPALA-3398: Add docs to main Impala branch.

Reply via email to