[27/51] [partial] incubator-impala git commit: IMPALA-3398: Add docs to main Impala branch.

jbapple Thu, 17 Nov 2016 15:12:43 -0800

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/3be0f122/docs/topics/impala_float.xml
----------------------------------------------------------------------
diff --git a/docs/topics/impala_float.xml b/docs/topics/impala_float.xml
new file mode 100644
index 0000000..8ef1144
--- /dev/null
+++ b/docs/topics/impala_float.xml
@@ -0,0 +1,94 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE concept PUBLIC "-//OASIS//DTD DITA Concept//EN" "concept.dtd">
+<concept id="float">
+
+  <title>FLOAT Data Type</title>
+  <titlealts audience="PDF"><navtitle>FLOAT</navtitle></titlealts>
+  <prolog>
+    <metadata>
+      <data name="Category" value="Impala"/>
+      <data name="Category" value="Impala Data Types"/>
+      <data name="Category" value="SQL"/>
+      <data name="Category" value="Data Analysts"/>
+      <data name="Category" value="Developers"/>
+      <data name="Category" value="Schemas"/>
+    </metadata>
+  </prolog>
+
+  <conbody>
+
+    <p>
+      A single precision floating-point data type used in <codeph>CREATE 
TABLE</codeph> and <codeph>ALTER
+      TABLE</codeph> statements.
+    </p>
+
+    <p conref="../shared/impala_common.xml#common/syntax_blurb"/>
+
+    <p>
+      In the column definition of a <codeph>CREATE TABLE</codeph> statement:
+    </p>
+
+<codeblock><varname>column_name</varname> FLOAT</codeblock>
+
+    <p>
+      <b>Range:</b> 1.40129846432481707e-45 .. 3.40282346638528860e+38, 
positive or negative
+    </p>
+
+    <p>
+      <b>Precision:</b> 6 to 9 significant digits, depending on usage. The 
number of significant digits does
+      not depend on the position of the decimal point.
+    </p>
+
+    <p>
+      <b>Representation:</b> The values are stored in 4 bytes, using
+      <xref 
href="https://en.wikipedia.org/wiki/Single-precision_floating-point_format"; 
scope="external" format="html">IEEE 754 Single Precision Binary Floating 
Point</xref> format.
+    </p>
+
+    <p>
+      <b>Conversions:</b> Impala automatically converts <codeph>FLOAT</codeph> 
to more precise
+      <codeph>DOUBLE</codeph> values, but not the other way around. You can 
use <codeph>CAST()</codeph> to convert
+      <codeph>FLOAT</codeph> values to <codeph>TINYINT</codeph>, 
<codeph>SMALLINT</codeph>, <codeph>INT</codeph>,
+      <codeph>BIGINT</codeph>, <codeph>STRING</codeph>, 
<codeph>TIMESTAMP</codeph>, or <codeph>BOOLEAN</codeph>.
+      You can use exponential notation in <codeph>FLOAT</codeph> literals or 
when casting from
+      <codeph>STRING</codeph>, for example <codeph>1.0e6</codeph> to represent 
one million.
+      <ph conref="../shared/impala_common.xml#common/cast_int_to_timestamp"/>
+    </p>
+
+    <p conref="../shared/impala_common.xml#common/example_blurb"/>
+
+<codeblock>CREATE TABLE t1 (x FLOAT);
+SELECT CAST(1000.5 AS FLOAT);
+</codeblock>
+
+    <p conref="../shared/impala_common.xml#common/partitioning_imprecise"/>
+
+    <p conref="../shared/impala_common.xml#common/hbase_ok"/>
+
+    <p conref="../shared/impala_common.xml#common/parquet_ok"/>
+
+    <p conref="../shared/impala_common.xml#common/text_bulky"/>
+
+<!-- <p conref="../shared/impala_common.xml#common/compatibility_blurb"/> -->
+
+    <p conref="../shared/impala_common.xml#common/internals_4_bytes"/>
+
+<!-- <p conref="../shared/impala_common.xml#common/added_in_20"/> -->
+
+    <p conref="../shared/impala_common.xml#common/column_stats_constant"/>
+
+    <p conref="../shared/impala_common.xml#common/restrictions_blurb"/>
+
+<!-- This conref appears under SUM(), AVG(), FLOAT, and DOUBLE topics. -->
+
+    <p conref="../shared/impala_common.xml#common/sum_double"/>
+
+    <p 
conref="../shared/impala_common.xml#common/float_double_decimal_caveat"/>
+
+    <p conref="../shared/impala_common.xml#common/related_info"/>
+
+    <p>
+      <xref href="impala_literals.xml#numeric_literals"/>, <xref 
href="impala_math_functions.xml#math_functions"/>,
+      <xref href="impala_double.xml#double"/>
+    </p>
+  </conbody>
+</concept>


http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/3be0f122/docs/topics/impala_functions.xml
----------------------------------------------------------------------
diff --git a/docs/topics/impala_functions.xml b/docs/topics/impala_functions.xml
new file mode 100644
index 0000000..55a36dc
--- /dev/null
+++ b/docs/topics/impala_functions.xml
@@ -0,0 +1,162 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE concept PUBLIC "-//OASIS//DTD DITA Concept//EN" "concept.dtd">
+<concept id="builtins">
+
+  <title id="title_functions">Impala Built-In Functions</title>
+  <titlealts audience="PDF"><navtitle>Built-In Functions</navtitle></titlealts>
+  <prolog>
+    <metadata>
+      <data name="Category" value="Impala"/>
+      <data name="Category" value="Impala Functions"/>
+      <data name="Category" value="SQL"/>
+      <data name="Category" value="Querying"/>
+      <data name="Category" value="Data Analysts"/>
+      <data name="Category" value="Developers"/>
+    </metadata>
+  </prolog>
+
+  <conbody>
+
+    <!-- To do:
+      Opportunity to conref some material between here and the "Functions" 
topic under "Schema Objects".
+    -->
+
+    <p>
+      Impala supports several categories of built-in functions. These 
functions let you perform mathematical
+      calculations, string manipulation, date calculations, and other kinds of 
data transformations directly in
+      <codeph>SELECT</codeph> statements. The built-in functions let a SQL 
query return results with all
+      formatting, calculating, and type conversions applied, rather than 
performing time-consuming postprocessing
+      in another application. By applying function calls where practical, you 
can make a SQL query that is as
+      convenient as an expression in a procedural programming language or a 
formula in a spreadsheet.
+    </p>
+
+    <p>
+      The categories of functions supported by Impala are:
+    </p>
+
+    <ul>
+      <li>
+        <xref href="impala_math_functions.xml#math_functions"/>
+      </li>
+
+      <li>
+        <xref href="impala_conversion_functions.xml#conversion_functions"/>
+      </li>
+
+      <li>
+        <xref href="impala_datetime_functions.xml#datetime_functions"/>
+      </li>
+
+      <li>
+        <xref href="impala_conditional_functions.xml#conditional_functions"/>
+      </li>
+
+      <li>
+        <xref href="impala_string_functions.xml#string_functions"/>
+      </li>
+
+      <li>
+        Aggregation functions, explained in <xref 
href="impala_aggregate_functions.xml#aggregate_functions"/>.
+      </li>
+    </ul>
+
+    <p>
+      You call any of these functions through the <codeph>SELECT</codeph> 
statement. For most functions, you can
+      omit the <codeph>FROM</codeph> clause and supply literal values for any 
required arguments:
+    </p>
+
+<codeblock>select abs(-1);
++---------+
+| abs(-1) |
++---------+
+| 1       |
++---------+
+
+select concat('The rain ', 'in Spain');
++---------------------------------+
+| concat('the rain ', 'in spain') |
++---------------------------------+
+| The rain in Spain               |
++---------------------------------+
+
+select power(2,5);
++-------------+
+| power(2, 5) |
++-------------+
+| 32          |
++-------------+
+</codeblock>
+
+    <p>
+      When you use a <codeph>FROM</codeph> clause and specify a column name as 
a function argument, the function is
+      applied for each item in the result set:
+    </p>
+
+<!-- TK: make real output for these; change the queries if necessary to use 
tables I already have. -->
+
+<codeblock>select concat('Country = ',country_code) from all_countries where 
population &gt; 100000000;
+select round(price) as dollar_value from product_catalog where price between 
0.0 and 100.0;
+</codeblock>
+
+    <p>
+      Typically, if any argument to a built-in function is 
<codeph>NULL</codeph>, the result value is also
+      <codeph>NULL</codeph>:
+    </p>
+
+<codeblock>select cos(null);
++-----------+
+| cos(null) |
++-----------+
+| NULL      |
++-----------+
+
+select power(2,null);
++----------------+
+| power(2, null) |
++----------------+
+| NULL           |
++----------------+
+
+select concat('a',null,'b');
++------------------------+
+| concat('a', null, 'b') |
++------------------------+
+| NULL                   |
++------------------------+
+</codeblock>
+
+    <p conref="../shared/impala_common.xml#common/aggr1"/>
+
+<codeblock conref="../shared/impala_common.xml#common/aggr2"/>
+
+    <p conref="../shared/impala_common.xml#common/aggr3"/>
+
+    <p>
+      Aggregate functions are a special category with different rules. These 
functions calculate a return value
+      across all the items in a result set, so they do require a 
<codeph>FROM</codeph> clause in the query:
+    </p>
+
+<!-- TK: make real output for these; change the queries if necessary to use 
tables I already have. -->
+
+<codeblock>select count(product_id) from product_catalog;
+select max(height), avg(height) from census_data where age &gt; 20;
+</codeblock>
+
+    <p>
+      Aggregate functions also ignore <codeph>NULL</codeph> values rather than 
returning a <codeph>NULL</codeph>
+      result. For example, if some rows have <codeph>NULL</codeph> for a 
particular column, those rows are ignored
+      when computing the AVG() for that column. Likewise, specifying 
<codeph>COUNT(col_name)</codeph> in a query
+      counts only those rows where <codeph>col_name</codeph> contains a 
non-<codeph>NULL</codeph> value.
+    </p>
+
+    <p rev="2.0.0">
+      Analytic functions are a variation on aggregate functions. Instead of 
returning a single value, or an
+      identical value for each group of rows, they can compute values that 
vary based on a <q>window</q> consisting
+      of other rows around them in the result set.
+    </p>
+
+    <p outputclass="toc"/>
+
+  </conbody>
+
+</concept>

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/3be0f122/docs/topics/impala_functions_overview.xml
----------------------------------------------------------------------
diff --git a/docs/topics/impala_functions_overview.xml 
b/docs/topics/impala_functions_overview.xml
new file mode 100644
index 0000000..0e3973b
--- /dev/null
+++ b/docs/topics/impala_functions_overview.xml
@@ -0,0 +1,116 @@
+<?xml version="1.0" encoding="UTF-8"?><!DOCTYPE concept PUBLIC "-//OASIS//DTD 
DITA Concept//EN" "concept.dtd">
+<concept id="functions">
+
+  <title>Overview of Impala Functions</title>
+  <titlealts audience="PDF"><navtitle>Functions</navtitle></titlealts>
+  <prolog>
+    <metadata>
+      <data name="Category" value="Impala"/>
+      <data name="Category" value="Impala Functions"/>
+      <data name="Category" value="SQL"/>
+      <data name="Category" value="Data Analysts"/>
+      <data name="Category" value="Developers"/>
+    </metadata>
+  </prolog>
+
+  <conbody>
+
+    <p>
+      Functions let you apply arithmetic, string, or other computations and 
transformations to Impala data. You
+      typically use them in <codeph>SELECT</codeph> lists and 
<codeph>WHERE</codeph> clauses to filter and format
+      query results so that the result set is exactly what you want, with no 
further processing needed on the
+      application side.
+    </p>
+
+    <p>
+      Scalar functions return a single result for each input row. See <xref 
href="impala_functions.xml#builtins"/>.
+    </p>
+
+<codeblock>[localhost:21000] > select name, population from country where 
continent = 'North America' order by population desc limit 4;
+[localhost:21000] > select upper(name), population from country where 
continent = 'North America' order by population desc limit 4;
++-------------+------------+
+| upper(name) | population |
++-------------+------------+
+| USA         | 320000000  |
+| MEXICO      | 122000000  |
+| CANADA      | 25000000   |
+| GUATEMALA   | 16000000   |
++-------------+------------+
+</codeblock>
+    <p>
+      Aggregate functions combine the results from multiple rows:
+      either a single result for the entire table, or a separate result for 
each group of rows.
+      Aggregate functions are frequently used in combination with 
<codeph>GROUP BY</codeph>
+      and <codeph>HAVING</codeph> clauses in the <codeph>SELECT</codeph> 
statement.
+      See <xref href="impala_aggregate_functions.xml#aggregate_functions"/>.
+    </p>
+
+<codeblock>[localhost:21000] > select continent, <b>sum(population)</b> as 
howmany from country <b>group by continent</b> order by howmany desc;
++---------------+------------+
+| continent     | howmany    |
++---------------+------------+
+| Asia          | 4298723000 |
+| Africa        | 1110635000 |
+| Europe        | 742452000  |
+| North America | 565265000  |
+| South America | 406740000  |
+| Oceania       | 38304000   |
++---------------+------------+
+</codeblock>
+
+    <p>
+      User-defined functions (UDFs) let you code your own logic.  They can be 
either scalar or aggregate functions.
+      UDFs let you implement important business or scientific logic using 
high-performance code for Impala to automatically parallelize.
+      You can also use UDFs to implement convenience functions to simplify 
reporting or porting SQL from other database systems.
+      See <xref href="impala_udf.xml#udfs"/>.
+    </p>
+
+<codeblock>[localhost:21000] > select <b>rot13('Hello world!')</b> as 'Weak 
obfuscation';
++------------------+
+| weak obfuscation |
++------------------+
+| Uryyb jbeyq!     |
++------------------+
+[localhost:21000] > select <b>likelihood_of_new_subatomic_particle(sensor1, 
sensor2, sensor3)</b> as probability
+                  > from experimental_results group by experiment;
+</codeblock>
+
+    <p>
+      Each function is associated with a specific database. For example, if 
you issue a <codeph>USE somedb</codeph>
+      statement followed by <codeph>CREATE FUNCTION somefunc</codeph>, the new 
function is created in the
+      <codeph>somedb</codeph> database, and you could refer to it through the 
fully qualified name
+      <codeph>somedb.somefunc</codeph>. You could then issue another 
<codeph>USE</codeph> statement
+      and create a function with the same name in a different database.
+    </p>
+
+    <p>
+      Impala built-in functions are associated with a special database named 
<codeph>_impala_builtins</codeph>,
+      which lets you refer to them from any database without qualifying the 
name.
+    </p>
+
+<codeblock>[localhost:21000] > show databases;
++-------------------------+
+| name                    |
++-------------------------+
+| <b>_impala_builtins</b>        |
+| analytic_functions      |
+| avro_testing            |
+| data_file_size          |
+...
+[localhost:21000] > show functions in _impala_builtins like '*subs*';
++-------------+-----------------------------------+
+| return type | signature                         |
++-------------+-----------------------------------+
+| STRING      | substr(STRING, BIGINT)            |
+| STRING      | substr(STRING, BIGINT, BIGINT)    |
+| STRING      | substring(STRING, BIGINT)         |
+| STRING      | substring(STRING, BIGINT, BIGINT) |
++-------------+-----------------------------------+
+</codeblock>
+
+    <p>
+      <b>Related statements:</b> <xref 
href="impala_create_function.xml#create_function"/>,
+      <xref href="impala_drop_function.xml#drop_function"/>
+    </p>
+  </conbody>
+</concept>

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/3be0f122/docs/topics/impala_glossary.xml
----------------------------------------------------------------------
diff --git a/docs/topics/impala_glossary.xml b/docs/topics/impala_glossary.xml
new file mode 100644
index 0000000..f713738
--- /dev/null
+++ b/docs/topics/impala_glossary.xml
@@ -0,0 +1,834 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE concept PUBLIC "-//OASIS//DTD DITA Concept//EN" "concept.dtd">
+<concept id="glossary">
+
+  <title>Impala Glossary</title>
+  <prolog>
+    <metadata>
+      <data name="Category" value="Impala"/>
+      <data name="Category" value="Administrators"/>
+      <data name="Category" value="Developers"/>
+      <data name="Category" value="Data Analysts"/>
+    </metadata>
+  </prolog>
+
+  <conbody>
+
+    <section id="glos_ddl">
+
+      <title>DDL</title>
+
+      <p>
+        A category of SQL statements involving administrative or 
metadata-related operations. Many of these
+        statements start with the keywords <codeph>CREATE</codeph>, 
<codeph>ALTER</codeph>, or
+        <codeph>DROP</codeph>. Contrast with DML.
+      </p>
+    </section>
+
+    <section id="glos_dml">
+
+      <title>DML</title>
+
+      <p>
+        A category of SQL statements that modify table data. Impala has a 
limited set of DML statements, primarily
+        the <codeph>INSERT</codeph> statement. Contrast with DDL.
+      </p>
+    </section>
+
+    <section id="glos_catalog">
+
+      <title>catalog</title>
+
+      <p></p>
+    </section>
+
+    <section id="glos_catalogd">
+
+      <title>catalogd</title>
+
+      <p>
+        A daemon.
+      </p>
+    </section>
+
+    <section id="glos_impalad">
+
+      <title>impalad</title>
+
+      <p>
+        A daemon.
+      </p>
+    </section>
+
+    <section id="glos_statestored">
+
+      <title>statestored</title>
+
+      <p>
+        A daemon.
+      </p>
+    </section>
+
+    <section id="glos_statestore">
+
+      <title>statestore</title>
+
+      <p></p>
+    </section>
+
+    <section id="glos_metastore">
+
+      <title>metastore</title>
+
+      <p></p>
+    </section>
+
+    <section id="glos_table">
+
+      <title>table</title>
+
+      <p></p>
+    </section>
+
+    <section id="glos_database">
+
+      <title>database</title>
+
+      <p></p>
+    </section>
+
+    <section id="glos_hive">
+
+      <title>Hive</title>
+
+      <p>
+        Its full trademarked name is Apache Hive.
+      </p>
+    </section>
+
+    <section id="glos_hbase">
+
+      <title>HBase</title>
+
+      <p>
+        Its full trademarked name is Apache HBase.
+      </p>
+    </section>
+
+    <section id="glos_coordinator_node">
+
+      <title>coordinator node</title>
+
+      <p></p>
+    </section>
+
+    <section id="glos_sql">
+
+      <title>SQL</title>
+
+      <p>
+        An acronym for Structured Query Language, the industry standard 
language for database operations.
+        Implemented by components such as Impala and Hive.
+      </p>
+    </section>
+
+    <section id="glos_avro">
+
+      <title>Avro</title>
+
+      <p>
+        A file format.
+      </p>
+    </section>
+
+    <section id="glos_text">
+
+      <title>text</title>
+
+      <p>
+        A file format.
+      </p>
+    </section>
+
+    <section id="glos_hdfs">
+
+      <title>HDFS</title>
+
+      <p></p>
+    </section>
+
+    <section id="glos_block">
+
+      <title>block</title>
+
+      <p></p>
+    </section>
+
+    <section id="glos_parquet">
+
+      <title>Parquet</title>
+
+      <p>
+        A file format.
+      </p>
+    </section>
+
+    <section id="glos_rcfile">
+
+      <title>RCFile</title>
+
+      <p>
+        A file format.
+      </p>
+    </section>
+
+    <section id="glos_sequencefile">
+
+      <title>SequenceFile</title>
+
+      <p>
+        A file format.
+      </p>
+    </section>
+
+    <section id="glos_snappy">
+
+      <title>Snappy</title>
+
+      <p></p>
+    </section>
+
+    <section id="glos_file_format">
+
+      <title>file format</title>
+
+      <p></p>
+    </section>
+
+    <section id="glos_compression">
+
+      <title>compression</title>
+
+      <p></p>
+    </section>
+
+    <section id="glos_codec">
+
+      <title>codec</title>
+
+      <p></p>
+    </section>
+
+    <section id="glos_lzo">
+
+      <title>LZO</title>
+
+      <p>
+        A compression codec.
+      </p>
+    </section>
+
+    <section id="glos_gzip">
+
+      <title>gzip</title>
+
+      <p>
+        A compression codec.
+      </p>
+    </section>
+
+    <section id="glos_bzip2">
+
+      <title>BZip2</title>
+
+      <p>
+        A compression codec.
+      </p>
+    </section>
+
+    <section id="glos_sentry">
+
+      <title/>
+
+      <p></p>
+    </section>
+
+    <section id="glos_authorization">
+
+      <title>authorization</title>
+
+      <p></p>
+    </section>
+
+    <section id="glos_authentication">
+
+      <title>authentication</title>
+
+      <p></p>
+    </section>
+
+    <section id="glos_delegation">
+
+      <title>delegation</title>
+
+      <p></p>
+    </section>
+
+    <section id="glos_skew">
+
+      <title>skew</title>
+
+      <p></p>
+    </section>
+
+    <section id="glos_profile">
+
+      <title>profile</title>
+
+      <p></p>
+    </section>
+
+    <section id="glos_query">
+
+      <title>query</title>
+
+      <p></p>
+    </section>
+
+    <section id="glos_external_table">
+
+      <title>external table</title>
+
+      <p></p>
+    </section>
+
+    <section id="glos_internal_table">
+
+      <title>internal table</title>
+
+      <p></p>
+    </section>
+
+    <section id="glos_view">
+
+      <title>view</title>
+
+      <p></p>
+    </section>
+
+    <section id="glos_function">
+
+      <title>function</title>
+
+      <p>
+        In Impala, a kind of database object that performs customized 
processing or business logic. The kinds of
+        functions include built-in functions, UDFs, and UDAs.
+      </p>
+    </section>
+
+    <section id="glos_udf">
+
+      <title>UDF</title>
+
+      <p>
+        Acronym for user-defined function. In Impala, a function that you 
write in C++ or Java. A UDF (as opposed
+        to a UDA) returns a single scalar value for each row evaluated in the 
result set. On occasion, UDF is used
+        in a broad context to include all kinds of user-written functions, 
including UDAs.
+      </p>
+    </section>
+
+    <section id="glos_uda">
+
+      <title>UDA</title>
+
+      <p></p>
+    </section>
+
+    <section id="glos_aggregate_function">
+
+      <title>aggregate function</title>
+
+      <p></p>
+    </section>
+
+    <section id="glos_built_in_function">
+
+      <title>built-in function</title>
+
+      <p></p>
+    </section>
+
+    <section id="glos_analytic_function">
+
+      <title>analytic function</title>
+
+      <p></p>
+    </section>
+
+    <section id="glos_admission_control">
+
+      <title>admission control</title>
+
+      <p></p>
+    </section>
+
+    <section id="glos_llama">
+
+      <title>llama</title>
+
+      <p></p>
+    </section>
+
+    <section id="glos_impala">
+
+      <title>Impala</title>
+
+      <p></p>
+    </section>
+
+    <section id="glos_table_statistics">
+
+      <title>table statistics</title>
+
+      <p></p>
+    </section>
+
+    <section id="glos_column_statistics">
+
+      <title>column statistics</title>
+
+      <p></p>
+    </section>
+
+    <section id="glos_statistics">
+
+      <title>statistics</title>
+
+      <p></p>
+    </section>
+
+    <section id="glos_incremental_statistics">
+
+      <title>incremental statistics</title>
+
+      <p></p>
+    </section>
+
+    <section id="glos_hdfs_caching">
+
+      <title>HDFS caching</title>
+
+      <p></p>
+    </section>
+
+    <section id="glos_join_query">
+
+      <title>join query</title>
+
+      <p></p>
+    </section>
+
+    <section id="glos_straight_join">
+
+      <title>straight join</title>
+
+      <p></p>
+    </section>
+
+    <section id="glos_inner_join">
+
+      <title>inner join</title>
+
+      <p></p>
+    </section>
+
+    <section id="glos_outer_join">
+
+      <title>outer join</title>
+
+      <p></p>
+    </section>
+
+    <section id="glos_left_join">
+
+      <title>left join</title>
+
+      <p></p>
+    </section>
+
+    <section id="glos_right_join">
+
+      <title>right join</title>
+
+      <p></p>
+    </section>
+
+    <section id="glos_equijoin">
+
+      <title>equijoin</title>
+
+      <p></p>
+    </section>
+
+    <section id="glos_antijoin">
+
+      <title>antijoin</title>
+
+      <p></p>
+    </section>
+
+    <section id="glos_semijoin">
+
+      <title>semijoin</title>
+
+      <p></p>
+    </section>
+
+    <section id="glos_cross_join">
+
+      <title>cross join</title>
+
+      <p></p>
+    </section>
+
+    <section id="glos_hint">
+
+      <title>hint</title>
+
+      <p></p>
+    </section>
+
+    <section id="glos_distinct">
+
+      <title>distinct</title>
+
+      <p></p>
+    </section>
+
+    <section id="glos_impala_shell">
+
+      <title>impala-shell</title>
+
+      <p></p>
+    </section>
+
+    <section id="glos_odbc">
+
+      <title>ODBC</title>
+
+      <p></p>
+    </section>
+
+    <section id="glos_jdbc">
+
+      <title>JDBC</title>
+
+      <p></p>
+    </section>
+
+    <section id="glos_data_types">
+
+      <title>data types</title>
+
+      <p></p>
+    </section>
+
+    <section id="glos_performance">
+
+      <title>performance</title>
+
+      <p></p>
+    </section>
+
+    <section id="glos_scalability">
+
+      <title>scalability</title>
+
+      <p></p>
+    </section>
+
+    <section id="glos_hue">
+
+      <title>Hue</title>
+
+      <p></p>
+    </section>
+
+    <section id="glos_partition">
+
+      <title>partition</title>
+
+      <p></p>
+    </section>
+
+    <section id="glos_partitioning">
+
+      <title>partitioning</title>
+
+      <p></p>
+    </section>
+
+    <section id="glos_partitioned_table">
+
+      <title>partitioned table</title>
+
+      <p></p>
+    </section>
+
+    <section id="glos_hdfs_trashcan">
+
+      <title>HDFS trashcan</title>
+
+      <p></p>
+    </section>
+
+    <section id="glos_explain_plan">
+
+      <title>EXPLAIN plan</title>
+
+      <p></p>
+    </section>
+
+    <section id="glos_summary">
+
+      <title>summary</title>
+
+      <p></p>
+    </section>
+
+    <section id="glos_executive_summary">
+
+      <title>executive summary</title>
+
+      <p></p>
+    </section>
+
+    <section id="glos_alias">
+
+      <title>alias</title>
+
+      <p></p>
+    </section>
+
+    <section id="glos_identifier">
+
+      <title>identifier</title>
+
+      <p></p>
+    </section>
+
+    <section id="glos_query_option">
+
+      <title>query option</title>
+
+      <p></p>
+    </section>
+
+    <section id="glos_daemon">
+
+      <title>daemon</title>
+
+      <p>
+        The daemons that make up the Impala service are 
<cmdname>impalad</cmdname>, <cmdname>statestored</cmdname>,
+        and <cmdname>catalogd</cmdname>.
+      </p>
+    </section>
+
+    <section id="impala_service">
+
+      <title>Impala service</title>
+
+      <p></p>
+    </section>
+
+    <section id="glos_session">
+
+      <title>session</title>
+
+      <p></p>
+    </section>
+
+    <section id="glos_schema">
+
+      <title>schema</title>
+
+      <p></p>
+    </section>
+
+    <section id="glos_schema_evolution">
+
+      <title>schema evolution</title>
+
+      <p></p>
+    </section>
+
+    <section id="glos_literal">
+
+      <title>literal</title>
+
+      <p></p>
+    </section>
+
+    <section id="glos_string">
+
+      <title>string</title>
+
+      <p>
+        String values are represented in Impala by the data types 
<codeph>STRING</codeph>,
+        <codeph>VARCHAR</codeph>, and <codeph>CHAR</codeph>.
+      </p>
+    </section>
+
+    <section id="glos_number">
+
+      <title>number</title>
+
+      <p>
+        Numeric values are represented in Impala by the data types 
<codeph>INT</codeph>, <codeph>BIGINT</codeph>,
+        <codeph>SMALLINT</codeph>, <codeph>TINYINT</codeph>, 
<codeph>DECIMAL</codeph>, <codeph>DOUBLE</codeph>, and
+        <codeph>FLOAT</codeph>.
+      </p>
+    </section>
+
+    <section id="glos_date_time">
+
+      <title>date/time</title>
+
+      <p>
+        Date and time values are represented in Impala by the 
<codeph>TIMESTAMP</codeph> data type. Sometimes, for
+        convenience or as part of a partitioning plan, dates are represented 
as formatted <codeph>STRING</codeph>
+        columns or as separate integer columns for year, month, and day.
+      </p>
+    </section>
+
+    <section id="glos_cancel">
+
+      <title>cancel</title>
+
+      <p>
+        Halting an Impala query before it is completely finished.
+      </p>
+    </section>
+
+    <section id="glos_streaming">
+
+      <title>streaming</title>
+
+      <p></p>
+    </section>
+
+    <section id="glos_sorting">
+
+      <title>sorting</title>
+
+      <p></p>
+    </section>
+
+    <section id="glos_aggregation">
+
+      <title>aggregation</title>
+
+      <p></p>
+    </section>
+
+    <section id="glos_fragment">
+
+      <title>fragment</title>
+
+      <p></p>
+    </section>
+
+    <section id="glos_source">
+
+      <title>source</title>
+
+      <p></p>
+    </section>
+
+    <section id="glos_sink">
+
+      <title>sink</title>
+
+      <p></p>
+    </section>
+
+    <section id="glos_thrift">
+
+      <title>Thrift</title>
+
+      <p></p>
+    </section>
+
+    <section id="glos_delegation">
+
+      <title>delegation</title>
+
+      <p></p>
+    </section>
+
+    <section id="glos_authorization">
+
+      <title>authorization</title>
+
+      <p></p>
+    </section>
+
+    <section id="glos_authentication">
+
+      <title>authentication</title>
+
+      <p></p>
+    </section>
+
+    <section id="glos_partition_pruning">
+
+      <title>partition pruning</title>
+
+      <p></p>
+    </section>
+
+    <section id="glos_heartbeat">
+
+      <title>heartbeat</title>
+
+      <p></p>
+    </section>
+
+    <section id="glos_subscriber">
+
+      <title>subscriber</title>
+
+      <p></p>
+    </section>
+
+    <section id="glos_topic">
+
+      <title>topic</title>
+
+      <p></p>
+    </section>
+
+    <section id="glos_broadcast_join">
+
+      <title>broadcast join</title>
+
+      <p></p>
+    </section>
+
+    <section id="glos_shuffle_join">
+
+      <title>shuffle join</title>
+
+      <p></p>
+    </section>
+
+<!-- Use as a template for new definitions:
+    <section id="glos_">
+      <title></title>
+      <p>
+      </p>
+    </section>
+
+-->
+  </conbody>
+</concept>

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/3be0f122/docs/topics/impala_grant.xml
----------------------------------------------------------------------
diff --git a/docs/topics/impala_grant.xml b/docs/topics/impala_grant.xml
new file mode 100644
index 0000000..a14a717
--- /dev/null
+++ b/docs/topics/impala_grant.xml
@@ -0,0 +1,122 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE concept PUBLIC "-//OASIS//DTD DITA Concept//EN" "concept.dtd">
+<concept rev="2.0.0" id="grant">
+
+  <title>GRANT Statement (<keyword keyref="impala20"/> or higher only)</title>
+  <titlealts audience="PDF"><navtitle>GRANT</navtitle></titlealts>
+  <prolog>
+    <metadata>
+      <data name="Category" value="Impala"/>
+      <data name="Category" value="DDL"/>
+      <data name="Category" value="SQL"/>
+      <data name="Category" value="Security"/>
+      <data name="Category" value="Sentry"/>
+      <data name="Category" value="Roles"/>
+      <data name="Category" value="Administrators"/>
+      <data name="Category" value="Developers"/>
+      <data name="Category" value="Data Analysts"/>
+      <!-- Consider whether to go deeper into categories like Security for the 
Sentry-related statements. -->
+    </metadata>
+  </prolog>
+
+  <conbody>
+
+    <p rev="2.0.0">
+      <indexterm audience="Cloudera">GRANT statement</indexterm>
+<!-- Copied from Sentry docs. Turn into conref. I did some rewording for 
clarity. -->
+      The <codeph>GRANT</codeph> statement grants roles or privileges on 
specified objects to groups. Only Sentry
+      administrative users can grant roles to a group.
+    </p>
+
+    <p conref="../shared/impala_common.xml#common/syntax_blurb"/>
+
+<codeblock rev="2.3.0 collevelauth">GRANT ROLE <varname>role_name</varname> TO 
GROUP <varname>group_name</varname>
+
+GRANT <varname>privilege</varname> ON <varname>object_type</varname> 
<varname>object_name</varname>
+   TO [ROLE] <varname>roleName</varname>
+   [WITH GRANT OPTION]
+
+<ph rev="2.3.0">privilege ::= SELECT | SELECT(<varname>column_name</varname>) 
| INSERT | ALL</ph>
+object_type ::= TABLE | DATABASE | SERVER | URI
+</codeblock>
+
+    <p>
+      Typically, the object name is an identifier. For URIs, it is a string 
literal.
+    </p>
+
+<!-- Turn privilege info into a conref or series of conrefs. (In both GRANT 
and REVOKE.) -->
+
+    <p conref="../shared/impala_common.xml#common/privileges_blurb"/>
+
+    <p>
+<!-- To do: The wording here can be fluid, and it's reused in several 
statements. Turn into a conref. -->
+      Only administrative users (initially, a predefined set of users 
specified in the Sentry service configuration
+      file) can use this statement.
+    </p>
+
+    <p>
+      The <codeph>WITH GRANT OPTION</codeph> clause allows members of the 
specified role to issue
+      <codeph>GRANT</codeph> and <codeph>REVOKE</codeph> statements for those 
same privileges
+<!-- Copied from Sentry docs. Turn into conref. I did some rewording for 
clarity. -->
+      Hence, if a role has the <codeph>ALL</codeph> privilege on a database 
and the <codeph>WITH GRANT
+      OPTION</codeph> set, users granted that role can execute 
<codeph>GRANT</codeph>/<codeph>REVOKE</codeph>
+      statements only for that database or child tables of the database. This 
means a user could revoke the
+      privileges of the user that provided them the <codeph>GRANT 
OPTION</codeph>.
+    </p>
+
+    <p>
+<!-- Copied from Sentry docs. Turn into conref. Except I changed Hive to 
Impala. -->
+      Impala does not currently support revoking only the <codeph>WITH GRANT 
OPTION</codeph> from a privilege
+      previously granted to a role. To remove the <codeph>WITH GRANT 
OPTION</codeph>, revoke the privilege and
+      grant it again without the <codeph>WITH GRANT OPTION</codeph> flag.
+    </p>
+
+    <p rev="2.3.0 collevelauth">
+      The ability to grant or revoke <codeph>SELECT</codeph> privilege on 
specific columns is available
+      in <keyword keyref="impala23_full"/> and higher. See
+      <xref audience="integrated" 
href="sg_hive_sql.xml#concept_c2q_4qx_p4/col_level_auth_sentry"/><xref 
audience="standalone" 
href="https://www.cloudera.com/documentation/enterprise/latest/topics/sg_hive_sql.html";
 format="html" scope="external"/>
+      for details.
+    </p>
+
+<!-- Turn compatibility info into a conref or series of conrefs. (In both 
GRANT and REVOKE.) -->
+
+<!-- If they diverge during development, consider the version here in GRANT 
the authoritative one. -->
+
+    <p conref="../shared/impala_common.xml#common/compatibility_blurb"/>
+
+    <p>
+      <ul>
+        <li>
+          The Impala <codeph>GRANT</codeph> and <codeph>REVOKE</codeph> 
statements are available in CDH 5.2 and
+          later.
+        </li>
+
+        <li>
+          In CDH 5.1 and later, Impala can make use of any roles and 
privileges specified by the
+          <codeph>GRANT</codeph> and <codeph>REVOKE</codeph> statements in 
Hive, when your system is configured to
+          use the Sentry service instead of the file-based policy mechanism.
+        </li>
+
+        <li>
+          The Impala <codeph>GRANT</codeph> and <codeph>REVOKE</codeph> 
statements for privileges do not require
+          the <codeph>ROLE</codeph> keyword to be repeated before each role 
name, unlike the equivalent Hive
+          statements.
+        </li>
+
+        <li conref="../shared/impala_common.xml#common/grant_revoke_single"/>
+      </ul>
+    </p>
+
+    <p conref="../shared/impala_common.xml#common/cancel_blurb_no"/>
+
+    <p conref="../shared/impala_common.xml#common/permissions_blurb_no"/>
+
+    <p conref="../shared/impala_common.xml#common/related_info"/>
+
+    <p>
+      <xref href="impala_authorization.xml#authorization"/>, <xref 
href="impala_revoke.xml#revoke"/>,
+      <xref href="impala_create_role.xml#create_role"/>, <xref 
href="impala_drop_role.xml#drop_role"/>,
+      <xref href="impala_show.xml#show"/>
+    </p>
+  </conbody>
+</concept>

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/3be0f122/docs/topics/impala_group_by.xml
----------------------------------------------------------------------
diff --git a/docs/topics/impala_group_by.xml b/docs/topics/impala_group_by.xml
new file mode 100644
index 0000000..2c6ade3
--- /dev/null
+++ b/docs/topics/impala_group_by.xml
@@ -0,0 +1,139 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE concept PUBLIC "-//OASIS//DTD DITA Concept//EN" "concept.dtd">
+<concept id="group_by">
+
+  <title>GROUP BY Clause</title>
+  <prolog>
+    <metadata>
+      <data name="Category" value="Impala"/>
+      <data name="Category" value="SQL"/>
+      <data name="Category" value="Querying"/>
+      <data name="Category" value="Aggregate Functions"/>
+      <data name="Category" value="Developers"/>
+      <data name="Category" value="Data Analysts"/>
+    </metadata>
+  </prolog>
+
+  <conbody>
+
+    <p>
+      Specify the <codeph>GROUP BY</codeph> clause in queries that use 
aggregation functions, such as
+      <codeph><xref href="impala_count.xml#count">COUNT()</xref></codeph>,
+      <codeph><xref href="impala_sum.xml#sum">SUM()</xref></codeph>,
+      <codeph><xref href="impala_avg.xml#avg">AVG()</xref></codeph>,
+      <codeph><xref href="impala_min.xml#min">MIN()</xref></codeph>, and
+      <codeph><xref href="impala_max.xml#max">MAX()</xref></codeph>. Specify 
in the
+      <codeph><xref href="impala_group_by.xml#group_by">GROUP 
BY</xref></codeph> clause the names of all the
+      columns that do not participate in the aggregation operation.
+    </p>
+
+    <!-- Good to show an example of cases where ORDER BY does and doesn't work 
with complex types. -->
+    <p conref="../shared/impala_common.xml#common/complex_types_blurb"/>
+
+    <p rev="2.3.0">
+      In <keyword keyref="impala23_full"/> and higher, the complex data types 
<codeph>STRUCT</codeph>,
+      <codeph>ARRAY</codeph>, and <codeph>MAP</codeph> are available. These 
columns cannot
+      be referenced directly in the <codeph>ORDER BY</codeph> clause.
+      When you query a complex type column, you use join notation to 
<q>unpack</q> the elements
+      of the complex type, and within the join query you can include an 
<codeph>ORDER BY</codeph>
+      clause to control the order in the result set of the scalar elements 
from the complex type.
+      See <xref href="impala_complex_types.xml#complex_types"/> for details 
about Impala support for complex types.
+    </p>
+
+    <p conref="../shared/impala_common.xml#common/zero_length_strings"/>
+
+    <p conref="../shared/impala_common.xml#common/example_blurb"/>
+
+    <p>
+      For example, the following query finds the 5 items that sold the highest 
total quantity (using the
+      <codeph>SUM()</codeph> function, and also counts the number of sales 
transactions for those items (using the
+      <codeph>COUNT()</codeph> function). Because the column representing the 
item IDs is not used in any
+      aggregation functions, we specify that column in the <codeph>GROUP 
BY</codeph> clause.
+    </p>
+
+<codeblock>select
+  <b>ss_item_sk</b> as Item,
+  <b>count</b>(ss_item_sk) as Times_Purchased,
+  <b>sum</b>(ss_quantity) as Total_Quantity_Purchased
+from store_sales
+  <b>group by ss_item_sk</b>
+  order by sum(ss_quantity) desc
+  limit 5;
++-------+-----------------+--------------------------+
+| item  | times_purchased | total_quantity_purchased |
++-------+-----------------+--------------------------+
+| 9325  | 372             | 19072                    |
+| 4279  | 357             | 18501                    |
+| 7507  | 371             | 18475                    |
+| 5953  | 369             | 18451                    |
+| 16753 | 375             | 18446                    |
++-------+-----------------+--------------------------+</codeblock>
+
+    <p>
+      The <codeph>HAVING</codeph> clause lets you filter the results of 
aggregate functions, because you cannot
+      refer to those expressions in the <codeph>WHERE</codeph> clause. For 
example, to find the 5 lowest-selling
+      items that were included in at least 100 sales transactions, we could 
use this query:
+    </p>
+
+<codeblock>select
+  <b>ss_item_sk</b> as Item,
+  <b>count</b>(ss_item_sk) as Times_Purchased,
+  <b>sum</b>(ss_quantity) as Total_Quantity_Purchased
+from store_sales
+  <b>group by ss_item_sk</b>
+  <b>having times_purchased &gt;= 100</b>
+  order by sum(ss_quantity)
+  limit 5;
++-------+-----------------+--------------------------+
+| item  | times_purchased | total_quantity_purchased |
++-------+-----------------+--------------------------+
+| 13943 | 105             | 4087                     |
+| 2992  | 101             | 4176                     |
+| 4773  | 107             | 4204                     |
+| 14350 | 103             | 4260                     |
+| 11956 | 102             | 4275                     |
++-------+-----------------+--------------------------+</codeblock>
+
+    <p>
+      When performing calculations involving scientific or financial data, 
remember that columns with type
+      <codeph>FLOAT</codeph> or <codeph>DOUBLE</codeph> are stored as true 
floating-point numbers, which cannot
+      precisely represent every possible fractional value. Thus, if you 
include a <codeph>FLOAT</codeph> or
+      <codeph>DOUBLE</codeph> column in a <codeph>GROUP BY</codeph> clause, 
the results might not precisely match
+      literal values in your query or from an original Text data file. Use 
rounding operations, the
+      <codeph>BETWEEN</codeph> operator, or another arithmetic technique to 
match floating-point values that are
+      <q>near</q> literal values you expect. For example, this query on the 
<codeph>ss_wholesale_cost</codeph>
+      column returns cost values that are close but not identical to the 
original figures that were entered as
+      decimal fractions.
+    </p>
+
+<codeblock>select ss_wholesale_cost, avg(ss_quantity * ss_sales_price) as 
avg_revenue_per_sale
+  from sales
+  group by ss_wholesale_cost
+  order by avg_revenue_per_sale desc
+  limit 5;
++-------------------+----------------------+
+| ss_wholesale_cost | avg_revenue_per_sale |
++-------------------+----------------------+
+| 96.94000244140625 | 4454.351539300434    |
+| 95.93000030517578 | 4423.119941283189    |
+| 98.37999725341797 | 4332.516490316291    |
+| 97.97000122070312 | 4330.480601655014    |
+| 98.52999877929688 | 4291.316953108634    |
++-------------------+----------------------+</codeblock>
+
+    <p>
+      Notice how wholesale cost values originally entered as decimal fractions 
such as <codeph>96.94</codeph> and
+      <codeph>98.38</codeph> are slightly larger or smaller in the result set, 
due to precision limitations in the
+      hardware floating-point types. The imprecise representation of 
<codeph>FLOAT</codeph> and
+      <codeph>DOUBLE</codeph> values is why financial data processing systems 
often store currency using data types
+      that are less space-efficient but avoid these types of rounding errors.
+    </p>
+
+    <p conref="../shared/impala_common.xml#common/related_info"/>
+    <p>
+      <xref href="impala_select.xml#select"/>,
+      <xref href="impala_aggregate_functions.xml#aggregate_functions"/>
+    </p>
+
+  </conbody>
+</concept>

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/3be0f122/docs/topics/impala_group_concat.xml
----------------------------------------------------------------------
diff --git a/docs/topics/impala_group_concat.xml 
b/docs/topics/impala_group_concat.xml
new file mode 100644
index 0000000..0971875
--- /dev/null
+++ b/docs/topics/impala_group_concat.xml
@@ -0,0 +1,135 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE concept PUBLIC "-//OASIS//DTD DITA Concept//EN" "concept.dtd">
+<concept rev="1.2" id="group_concat">
+
+  <title>GROUP_CONCAT Function</title>
+  <titlealts audience="PDF"><navtitle>GROUP_CONCAT</navtitle></titlealts>
+  <prolog>
+    <metadata>
+      <data name="Category" value="Impala"/>
+      <data name="Category" value="SQL"/>
+      <data name="Category" value="Impala Functions"/>
+      <data name="Category" value="Aggregate Functions"/>
+      <data name="Category" value="Querying"/>
+      <data name="Category" value="Developers"/>
+      <data name="Category" value="Data Analysts"/>
+    </metadata>
+  </prolog>
+
+  <conbody>
+
+    <p>
+      <indexterm audience="Cloudera">group_concat() function</indexterm>
+      An aggregate function that returns a single string representing the 
argument value concatenated together for
+      each row of the result set. If the optional separator string is 
specified, the separator is added between
+      each pair of concatenated values. The default separator is a comma 
followed by a space.
+    </p>
+
+    <p conref="../shared/impala_common.xml#common/syntax_blurb"/>
+
+<!-- Might allow DISTINCT at some point. Check: does it allow ALL now? -->
+
+<codeblock>GROUP_CONCAT([ALL] <varname>expression</varname> [, 
<varname>separator</varname>])</codeblock>
+
+    <p conref="../shared/impala_common.xml#common/concat_blurb"/>
+
+    <p>
+      By default, returns a single string covering the whole result set. To 
include other columns or values in the
+      result set, or to produce multiple concatenated strings for subsets of 
rows, include a <codeph>GROUP
+      BY</codeph> clause in the query.
+    </p>
+
+    <p>
+      <b>Return type:</b> <codeph>STRING</codeph>
+    </p>
+
+    <p conref="../shared/impala_common.xml#common/restrictions_blurb"/>
+
+    <p>
+      You cannot apply the <codeph>DISTINCT</codeph> operator to the argument 
of this function.
+    </p>
+
+    <p 
conref="../shared/impala_common.xml#common/analytic_not_allowed_caveat"/>
+
+    <p>
+      Currently, Impala returns an error if the result value grows larger than 
1 GiB.
+    </p>
+
+    <p conref="../shared/impala_common.xml#common/example_blurb"/>
+
+    <p>
+      The following examples illustrate various aspects of the 
<codeph>GROUP_CONCAT()</codeph> function.
+    </p>
+
+    <p>
+      You can call the function directly on a <codeph>STRING</codeph> column. 
To use it with a numeric column, cast
+      the value to <codeph>STRING</codeph>.
+    </p>
+
+<codeblock>[localhost:21000] &gt; create table t1 (x int, s string);
+[localhost:21000] &gt; insert into t1 values (1, "one"), (3, "three"), (2, 
"two"), (1, "one");
+[localhost:21000] &gt; select group_concat(s) from t1;
++----------------------+
+| group_concat(s)      |
++----------------------+
+| one, three, two, one |
++----------------------+
+[localhost:21000] &gt; select group_concat(cast(x as string)) from t1;
++---------------------------------+
+| group_concat(cast(x as string)) |
++---------------------------------+
+| 1, 3, 2, 1                      |
++---------------------------------+
+</codeblock>
+
+    <p>
+      The optional separator lets you format the result in flexible ways. The 
separator can be an arbitrary string
+      expression, not just a single character.
+    </p>
+
+<codeblock>[localhost:21000] &gt; select group_concat(s,"|") from t1;
++----------------------+
+| group_concat(s, '|') |
++----------------------+
+| one|three|two|one    |
++----------------------+
+[localhost:21000] &gt; select group_concat(s,'---') from t1;
++-------------------------+
+| group_concat(s, '---')  |
++-------------------------+
+| one---three---two---one |
++-------------------------+
+</codeblock>
+
+    <p>
+      The default separator is a comma followed by a space. To get a 
comma-delimited result without extra spaces,
+      specify a delimiter character that is only a comma.
+    </p>
+
+<codeblock>[localhost:21000] &gt; select group_concat(s,',') from t1;
++----------------------+
+| group_concat(s, ',') |
++----------------------+
+| one,three,two,one    |
++----------------------+
+</codeblock>
+
+    <p>
+      Including a <codeph>GROUP BY</codeph> clause lets you produce a 
different concatenated result for each group
+      in the result set. In this example, the only <codeph>X</codeph> value 
that occurs more than once is
+      <codeph>1</codeph>, so that is the only row in the result set where 
<codeph>GROUP_CONCAT()</codeph> returns a
+      delimited value. For groups containing a single value, 
<codeph>GROUP_CONCAT()</codeph> returns the original
+      value of its <codeph>STRING</codeph> argument.
+    </p>
+
+<codeblock>[localhost:21000] &gt; select x, group_concat(s) from t1 group by x;
++---+-----------------+
+| x | group_concat(s) |
++---+-----------------+
+| 2 | two             |
+| 3 | three           |
+| 1 | one, one        |
++---+-----------------+
+</codeblock>
+  </conbody>
+</concept>

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/3be0f122/docs/topics/impala_hadoop.xml
----------------------------------------------------------------------
diff --git a/docs/topics/impala_hadoop.xml b/docs/topics/impala_hadoop.xml
new file mode 100644
index 0000000..7941a47
--- /dev/null
+++ b/docs/topics/impala_hadoop.xml
@@ -0,0 +1,165 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE concept PUBLIC "-//OASIS//DTD DITA Concept//EN" "concept.dtd">
+<concept id="intro_hadoop">
+
+  <title>How Impala Fits Into the Hadoop Ecosystem</title>
+  <titlealts audience="PDF"><navtitle>Role in the Hadoop 
Ecosystem</navtitle></titlealts>
+  <prolog>
+    <metadata>
+      <data name="Category" value="Impala"/>
+      <data name="Category" value="Concepts"/>
+      <data name="Category" value="Hadoop"/>
+      <data name="Category" value="Administrators"/>
+      <data name="Category" value="Developers"/>
+      <data name="Category" value="Data Analysts"/>
+    </metadata>
+  </prolog>
+
+  <conbody>
+
+    <p>
+      Impala makes use of many familiar components within the Hadoop 
ecosystem. Impala can interchange data with
+      other Hadoop components, as both a consumer and a producer, so it can 
fit in flexible ways into your ETL and
+      ELT pipelines.
+    </p>
+
+    <p outputclass="toc inpage"/>
+  </conbody>
+
+  <concept id="intro_hive">
+
+    <title>How Impala Works with Hive</title>
+
+    <conbody>
+
+      <p>
+        A major Impala goal is to make SQL-on-Hadoop operations fast and 
efficient enough to appeal to new
+        categories of users and open up Hadoop to new types of use cases. 
Where practical, it makes use of existing
+        Apache Hive infrastructure that many Hadoop users already have in 
place to perform long-running,
+        batch-oriented SQL queries.
+      </p>
+
+      <p>
+        In particular, Impala keeps its table definitions in a traditional 
MySQL or PostgreSQL database known as
+        the <b>metastore</b>, the same database where Hive keeps this type of 
data. Thus, Impala can access tables
+        defined or loaded by Hive, as long as all columns use Impala-supported 
data types, file formats, and
+        compression codecs.
+      </p>
+
+      <p>
+        The initial focus on query features and performance means that Impala 
can read more types of data with the
+        <codeph>SELECT</codeph> statement than it can write with the 
<codeph>INSERT</codeph> statement. To query
+        data using the Avro, RCFile, or SequenceFile <xref 
href="impala_file_formats.xml#file_formats">file
+        formats</xref>, you load the data using Hive.
+      </p>
+
+      <p rev="1.2.2">
+        The Impala query optimizer can also make use of <xref 
href="impala_perf_stats.xml#perf_table_stats">table
+        statistics</xref> and <xref 
href="impala_perf_stats.xml#perf_column_stats">column statistics</xref>.
+        Originally, you gathered this information with the <codeph>ANALYZE 
TABLE</codeph> statement in Hive; in
+        Impala 1.2.2 and higher, use the Impala <codeph><xref 
href="impala_compute_stats.xml#compute_stats">COMPUTE
+        STATS</xref></codeph> statement instead. <codeph>COMPUTE 
STATS</codeph> requires less setup, is more
+        reliable, and does not require switching back and forth between 
<cmdname>impala-shell</cmdname>
+        and the Hive shell.
+      </p>
+    </conbody>
+  </concept>
+
+  <concept id="intro_metastore">
+
+    <title>Overview of Impala Metadata and the Metastore</title>
+  <prolog>
+    <metadata>
+      <data name="Category" value="Concepts"/>
+      <data name="Category" value="Impala"/>
+      <data name="Category" value="Hive"/>
+    </metadata>
+  </prolog>
+
+    <conbody>
+
+      <p>
+        As discussed in <xref href="impala_hadoop.xml#intro_hive"/>, Impala 
maintains information about table
+        definitions in a central database known as the <b>metastore</b>. 
Impala also tracks other metadata for the
+        low-level characteristics of data files:
+      </p>
+
+      <ul>
+        <li>
+          The physical locations of blocks within HDFS.
+        </li>
+      </ul>
+
+      <p>
+        For tables with a large volume of data and/or many partitions, 
retrieving all the metadata for a table can
+        be time-consuming, taking minutes in some cases. Thus, each Impala 
node caches all of this metadata to
+        reuse for future queries against the same table.
+      </p>
+
+      <p rev="1.2">
+        If the table definition or the data in the table is updated, all other 
Impala daemons in the cluster must
+        receive the latest metadata, replacing the obsolete cached metadata, 
before issuing a query against that
+        table. In Impala 1.2 and higher, the metadata update is automatic, 
coordinated through the
+        <cmdname>catalogd</cmdname> daemon, for all DDL and DML statements 
issued through Impala. See
+        <xref href="impala_components.xml#intro_catalogd"/> for details.
+      </p>
+
+      <p>
+        For DDL and DML issued through Hive, or changes made manually to files 
in HDFS, you still use the
+        <codeph>REFRESH</codeph> statement (when new data files are added to 
existing tables) or the
+        <codeph>INVALIDATE METADATA</codeph> statement (for entirely new 
tables, or after dropping a table,
+        performing an HDFS rebalance operation, or deleting data files). 
Issuing <codeph>INVALIDATE
+        METADATA</codeph> by itself retrieves metadata for all the tables 
tracked by the metastore. If you know
+        that only specific tables have been changed outside of Impala, you can 
issue <codeph>REFRESH
+        <varname>table_name</varname></codeph> for each affected table to only 
retrieve the latest metadata for
+        those tables.
+      </p>
+    </conbody>
+  </concept>
+
+  <concept id="intro_hdfs">
+
+    <title>How Impala Uses HDFS</title>
+  <prolog>
+    <metadata>
+      <data name="Category" value="Concepts"/>
+      <data name="Category" value="Impala"/>
+      <data name="Category" value="HDFS"/>
+    </metadata>
+  </prolog>
+
+    <conbody>
+
+      <p>
+        Impala uses the distributed filesystem HDFS as its primary data 
storage medium. Impala relies on the
+        redundancy provided by HDFS to guard against hardware or network 
outages on individual nodes. Impala table
+        data is physically represented as data files in HDFS, using familiar 
HDFS file formats and compression
+        codecs. When data files are present in the directory for a new table, 
Impala reads them all, regardless of
+        file name. New data is added in files with names controlled by Impala.
+      </p>
+    </conbody>
+  </concept>
+
+  <concept id="intro_hbase">
+
+    <title>How Impala Uses HBase</title>
+  <prolog>
+    <metadata>
+      <data name="Category" value="Concepts"/>
+      <data name="Category" value="Impala"/>
+      <data name="Category" value="HBase"/>
+    </metadata>
+  </prolog>
+
+    <conbody>
+
+      <p>
+        HBase is an alternative to HDFS as a storage medium for Impala data. 
It is a database storage system built
+        on top of HDFS, without built-in SQL support. Many Hadoop users 
already have it configured and store large
+        (often sparse) data sets in it. By defining tables in Impala and 
mapping them to equivalent tables in
+        HBase, you can query the contents of the HBase tables through Impala, 
and even perform join queries
+        including both Impala and HBase tables. See <xref 
href="impala_hbase.xml#impala_hbase"/> for details.
+      </p>
+    </conbody>
+  </concept>
+</concept>

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/3be0f122/docs/topics/impala_having.xml
----------------------------------------------------------------------
diff --git a/docs/topics/impala_having.xml b/docs/topics/impala_having.xml
new file mode 100644
index 0000000..2de32bb
--- /dev/null
+++ b/docs/topics/impala_having.xml
@@ -0,0 +1,44 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE concept PUBLIC "-//OASIS//DTD DITA Concept//EN" "concept.dtd">
+<concept id="having">
+
+  <title>HAVING Clause</title>
+  <prolog>
+    <metadata>
+      <data name="Category" value="Impala"/>
+      <data name="Category" value="SQL"/>
+      <data name="Category" value="Querying"/>
+      <data name="Category" value="Aggregate Functions"/>
+      <data name="Category" value="Developers"/>
+      <data name="Category" value="Data Analysts"/>
+    </metadata>
+  </prolog>
+
+  <conbody>
+
+    <p>
+      Performs a filter operation on a <codeph>SELECT</codeph> query, by 
examining the results of aggregation
+      functions rather than testing each individual table row. Therefore, it 
is always used in conjunction with a
+      function such as <codeph><xref 
href="impala_count.xml#count">COUNT()</xref></codeph>,
+      <codeph><xref href="impala_sum.xml#sum">SUM()</xref></codeph>,
+      <codeph><xref href="impala_avg.xml#avg">AVG()</xref></codeph>,
+      <codeph><xref href="impala_min.xml#min">MIN()</xref></codeph>, or
+      <codeph><xref href="impala_max.xml#max">MAX()</xref></codeph>, and 
typically with the
+      <codeph><xref href="impala_group_by.xml#group_by">GROUP 
BY</xref></codeph> clause also.
+    </p>
+
+    <p conref="../shared/impala_common.xml#common/restrictions_blurb"/>
+
+    <p rev="2.0.0">
+      The filter expression in the <codeph>HAVING</codeph> clause cannot 
include a scalar subquery.
+    </p>
+
+    <p conref="../shared/impala_common.xml#common/related_info"/>
+    <p>
+      <xref href="impala_select.xml#select"/>,
+      <xref href="impala_group_by.xml#group_by"/>,
+      <xref href="impala_aggregate_functions.xml#aggregate_functions"/>
+    </p>
+
+  </conbody>
+</concept>

[27/51] [partial] incubator-impala git commit: IMPALA-3398: Add docs to main Impala branch.

Reply via email to