[01/22] incubator-impala git commit: First try at porting over the source files necessary for the Impala SQL Reference.

jrussell Tue, 26 Jul 2016 16:05:23 -0700

Repository: incubator-impala
Updated Branches:
  refs/heads/doc_prototype 0ad935b63 -> 463ddf924



http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/463ddf92/docs/topics/impala_varchar.xml
----------------------------------------------------------------------
diff --git a/docs/topics/impala_varchar.xml b/docs/topics/impala_varchar.xml
new file mode 100644
index 0000000..32db4ae
--- /dev/null
+++ b/docs/topics/impala_varchar.xml
@@ -0,0 +1,215 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE concept PUBLIC "-//OASIS//DTD DITA Concept//EN" "concept.dtd">
+<concept id="varchar" rev="2.0.0">
+
+  <title>VARCHAR Data Type (CDH 5.2 or higher only)</title>
+  <titlealts><navtitle>VARCHAR (CDH 5.2 or higher only)</navtitle></titlealts>
+  <prolog>
+    <metadata>
+      <data name="Category" value="Impala"/>
+      <data name="Category" value="Impala Data Types"/>
+      <data name="Category" value="SQL"/>
+      <data name="Category" value="Data Analysts"/>
+      <data name="Category" value="Developers"/>
+      <data name="Category" value="Schemas"/>
+    </metadata>
+  </prolog>
+
+  <conbody>
+
+    <p>
+      <indexterm audience="Cloudera">VARCHAR data type</indexterm>
+      A variable-length character type, truncated during processing if 
necessary to fit within the specified
+      length.
+    </p>
+
+    <p conref="../shared/impala_common.xml#common/syntax_blurb"/>
+
+    <p>
+      In the column definition of a <codeph>CREATE TABLE</codeph> statement:
+    </p>
+
+<codeblock><varname>column_name</varname> 
VARCHAR(<varname>max_length</varname>)</codeblock>
+
+    <p>
+      The maximum length you can specify is 65,535.
+    </p>
+
+    <p conref="../shared/impala_common.xml#common/partitioning_bad"/>
+
+<!--
+<p>
+This type can be used for partition key columns.
+Because of the efficiency advantage of numeric values over character-based 
values,
+if the partition key is a string representation of a number,
+prefer to use an integer data type with sufficient range (<codeph>INT</codeph>,
+<codeph>BIGINT</codeph>, and so on) rather than this type.
+</p>
+-->
+
+    <p conref="../shared/impala_common.xml#common/hbase_no"/>
+
+    <p conref="../shared/impala_common.xml#common/parquet_blurb"/>
+
+    <ul>
+      <li>
+        This type can be read from and written to Parquet files.
+      </li>
+
+      <li>
+        There is no requirement for a particular level of Parquet.
+      </li>
+
+      <li>
+        Parquet files generated by Impala and containing this type can be 
freely interchanged with other components
+        such as Hive and MapReduce.
+      </li>
+
+      <li>
+        Parquet data files can contain values that are longer than allowed by 
the
+        <codeph>VARCHAR(<varname>n</varname>)</codeph> length limit. Impala 
ignores any extra trailing characters
+        when it processes those values during a query.
+      </li>
+    </ul>
+
+    <p conref="../shared/impala_common.xml#common/text_blurb"/>
+
+    <p>
+      Text data files can contain values that are longer than allowed by the
+      <codeph>VARCHAR(<varname>n</varname>)</codeph> length limit. Any extra 
trailing characters are ignored when
+      Impala processes those values during a query.
+    </p>
+
+    <p conref="../shared/impala_common.xml#common/schema_evolution_blurb"/>
+
+    <p>
+      You can use <codeph>ALTER TABLE ... CHANGE</codeph> to switch column 
data types to and from
+      <codeph>VARCHAR</codeph>. You can convert from <codeph>STRING</codeph> to
+      <codeph>VARCHAR(<varname>n</varname>)</codeph>, or from 
<codeph>VARCHAR(<varname>n</varname>)</codeph> to
+      <codeph>STRING</codeph>, or from 
<codeph>CHAR(<varname>n</varname>)</codeph> to
+      <codeph>VARCHAR(<varname>n</varname>)</codeph>, or from 
<codeph>VARCHAR(<varname>n</varname>)</codeph> to
+      <codeph>CHAR(<varname>n</varname>)</codeph>. When switching back and 
forth between <codeph>VARCHAR</codeph>
+      and <codeph>CHAR</codeph>, you can also change the length value. This 
schema evolution works the same for
+      tables using any file format. If a table contains values longer than the 
maximum length defined for a
+      <codeph>VARCHAR</codeph> column, Impala does not return an error. Any 
extra trailing characters are ignored
+      when Impala processes those values during a query.
+    </p>
+
+    <p conref="../shared/impala_common.xml#common/compatibility_blurb"/>
+
+    <p>
+      This type is available using Impala 2.0 or higher under CDH 4, or with 
Impala on CDH 5.2 or higher. There are
+      no compatibility issues with other components when exchanging data files 
or running Impala on CDH 4.
+    </p>
+
+    <p conref="../shared/impala_common.xml#common/internals_min_bytes"/>
+
+    <p conref="../shared/impala_common.xml#common/added_in_20"/>
+
+    <p conref="../shared/impala_common.xml#common/column_stats_variable"/>
+
+    <p conref="../shared/impala_common.xml#common/restrictions_blurb"/>
+
+    <p conref="../shared/impala_common.xml#common/blobs_are_strings"/>
+
+    <p conref="../shared/impala_common.xml#common/example_blurb"/>
+
+    <p>
+      The following examples show how long and short <codeph>VARCHAR</codeph> 
values are treated. Values longer
+      than the maximum specified length are truncated by 
<codeph>CAST()</codeph>, or when queried from existing
+      data files. Values shorter than the maximum specified length are 
represented as the actual length of the
+      value, with no extra padding as seen with <codeph>CHAR</codeph> values.
+    </p>
+
+<codeblock>create table varchar_1 (s varchar(1));
+create table varchar_4 (s varchar(4));
+create table varchar_20 (s varchar(20));
+
+insert into varchar_1 values (cast('a' as varchar(1))), (cast('b' as 
varchar(1))), (cast('hello' as varchar(1))), (cast('world' as varchar(1)));
+insert into varchar_4 values (cast('a' as varchar(4))), (cast('b' as 
varchar(4))), (cast('hello' as varchar(4))), (cast('world' as varchar(4)));
+insert into varchar_20 values (cast('a' as varchar(20))), (cast('b' as 
varchar(20))), (cast('hello' as varchar(20))), (cast('world' as varchar(20)));
+
+select * from varchar_1;
++---+
+| s |
++---+
+| a |
+| b |
+| h |
+| w |
++---+
+select * from varchar_4;
++------+
+| s    |
++------+
+| a    |
+| b    |
+| hell |
+| worl |
++------+
+[localhost:21000] &gt; select * from varchar_20;
++-------+
+| s     |
++-------+
+| a     |
+| b     |
+| hello |
+| world |
++-------+
+select concat('[',s,']') as s from varchar_20;
++---------+
+| s       |
++---------+
+| [a]     |
+| [b]     |
+| [hello] |
+| [world] |
++---------+
+</codeblock>
+
+    <p>
+      The following example shows how identical <codeph>VARCHAR</codeph> 
values compare as equal, even if the
+      columns are defined with different maximum lengths. Both tables contain 
<codeph>'a'</codeph> and
+      <codeph>'b'</codeph> values. The longer <codeph>'hello'</codeph> and 
<codeph>'world'</codeph> values from the
+      <codeph>VARCHAR_20</codeph> table were truncated when inserted into the 
<codeph>VARCHAR_1</codeph> table.
+    </p>
+
+<codeblock>select s from varchar_1 join varchar_20 using (s);
++-------+
+| s     |
++-------+
+| a     |
+| b     |
++-------+
+</codeblock>
+
+    <p>
+      The following examples show how <codeph>VARCHAR</codeph> values are 
freely interchangeable with
+      <codeph>STRING</codeph> values in contexts such as comparison operators 
and built-in functions:
+    </p>
+
+<codeblock>select length(cast('foo' as varchar(100))) as length;
++--------+
+| length |
++--------+
+| 3      |
++--------+
+select cast('xyz' as varchar(5)) &gt; cast('abc' as varchar(10)) as greater;
++---------+
+| greater |
++---------+
+| true    |
++---------+
+</codeblock>
+
+    <p conref="../shared/impala_common.xml#common/udf_blurb_no"/>
+
+    <p conref="../shared/impala_common.xml#common/related_info"/>
+
+    <p>
+      <xref href="impala_string.xml#string"/>, <xref 
href="impala_char.xml#char"/>,
+      <xref href="impala_literals.xml#string_literals"/>,
+      <xref href="impala_string_functions.xml#string_functions"/>
+    </p>
+  </conbody>
+</concept>

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/463ddf92/docs/topics/impala_variance.xml
----------------------------------------------------------------------
diff --git a/docs/topics/impala_variance.xml b/docs/topics/impala_variance.xml
new file mode 100644
index 0000000..e0c5d02
--- /dev/null
+++ b/docs/topics/impala_variance.xml
@@ -0,0 +1,127 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE concept PUBLIC "-//OASIS//DTD DITA Concept//EN" "concept.dtd">
+<concept rev="1.4" id="variance">
+
+  <title>VARIANCE, VARIANCE_SAMP, VARIANCE_POP, VAR_SAMP, VAR_POP 
Functions</title>
+  <titlealts><navtitle>VARIANCE, VARIANCE_SAMP, VARIANCE_POP, VAR_SAMP, 
VAR_POP</navtitle></titlealts>
+  <prolog>
+    <metadata>
+      <data name="Category" value="Impala"/>
+      <data name="Category" value="SQL"/>
+      <data name="Category" value="Impala Functions"/>
+      <data name="Category" value="Aggregate Functions"/>
+      <data name="Category" value="Querying"/>
+    </metadata>
+  </prolog>
+
+  <conbody>
+
+    <p>
+      <indexterm audience="Cloudera">variance() function</indexterm>
+      <indexterm audience="Cloudera">variance_samp() function</indexterm>
+      <indexterm audience="Cloudera">variance_pop() function</indexterm>
+      <indexterm audience="Cloudera">var_samp() function</indexterm>
+      <indexterm audience="Cloudera">var_pop() function</indexterm>
+      An aggregate function that returns the
+      <xref href="http://en.wikipedia.org/wiki/Variance"; scope="external" 
format="html">variance</xref> of a set of
+      numbers. This is a mathematical property that signifies how far the 
values spread apart from the mean. The
+      return value can be zero (if the input is a single value, or a set of 
identical values), or a positive number
+      otherwise.
+    </p>
+
+    <p conref="../shared/impala_common.xml#common/syntax_blurb"/>
+
+<codeblock>{ VARIANCE | VAR[IANCE]_SAMP | VAR[IANCE]_POP } ([DISTINCT | ALL] 
<varname>expression</varname>)</codeblock>
+
+    <p>
+      This function works with any numeric data type.
+    </p>
+
+    <p 
conref="../shared/impala_common.xml#common/former_odd_return_type_string"/>
+
+    <p>
+      This function is typically used in mathematical formulas related to 
probability distributions.
+    </p>
+
+    <p>
+      The <codeph>VARIANCE_SAMP()</codeph> and <codeph>VARIANCE_POP()</codeph> 
functions compute the sample
+      variance and population variance, respectively, of the input values. 
(<codeph>VARIANCE()</codeph> is an alias
+      for <codeph>VARIANCE_SAMP()</codeph>.) Both functions evaluate all input 
rows matched by the query. The
+      difference is that <codeph>STDDEV_SAMP()</codeph> is scaled by 
<codeph>1/(N-1)</codeph> while
+      <codeph>STDDEV_POP()</codeph> is scaled by <codeph>1/N</codeph>.
+    </p>
+
+    <p rev="2.0.0">
+      The functions <codeph>VAR_SAMP()</codeph> and <codeph>VAR_POP()</codeph> 
are the same as
+      <codeph>VARIANCE_SAMP()</codeph> and <codeph>VARIANCE_POP()</codeph>, 
respectively. These aliases are
+      available in Impala 2.0 and later.
+    </p>
+
+    <p>
+      If no input rows match the query, the result of any of these functions 
is <codeph>NULL</codeph>. If a single
+      input row matches the query, the result of any of these functions is 
<codeph>"0.0"</codeph>.
+    </p>
+
+    <p conref="../shared/impala_common.xml#common/example_blurb"/>
+
+    <p>
+      This example demonstrates how <codeph>VARIANCE()</codeph> and 
<codeph>VARIANCE_SAMP()</codeph> return the
+      same result, while <codeph>VARIANCE_POP()</codeph> uses a slightly 
different calculation to reflect that the
+      input data is considered part of a larger <q>population</q>.
+    </p>
+
+<codeblock>[localhost:21000] &gt; select variance(score) from test_scores;
++-----------------+
+| variance(score) |
++-----------------+
+| 812.25          |
++-----------------+
+[localhost:21000] &gt; select variance_samp(score) from test_scores;
++----------------------+
+| variance_samp(score) |
++----------------------+
+| 812.25               |
++----------------------+
+[localhost:21000] &gt; select variance_pop(score) from test_scores;
++---------------------+
+| variance_pop(score) |
++---------------------+
+| 811.438             |
++---------------------+
+</codeblock>
+
+    <p>
+      This example demonstrates that, because the return value of these 
aggregate functions is a
+      <codeph>STRING</codeph>, you convert the result with 
<codeph>CAST</codeph> if you need to do further
+      calculations as a numeric value.
+    </p>
+
+<codeblock>[localhost:21000] &gt; create table score_stats as select 
cast(stddev(score) as decimal(7,4)) `standard_deviation`, cast(variance(score) 
as decimal(7,4)) `variance` from test_scores;
++-------------------+
+| summary           |
++-------------------+
+| Inserted 1 row(s) |
++-------------------+
+[localhost:21000] &gt; desc score_stats;
++--------------------+--------------+---------+
+| name               | type         | comment |
++--------------------+--------------+---------+
+| standard_deviation | decimal(7,4) |         |
+| variance           | decimal(7,4) |         |
++--------------------+--------------+---------+
+</codeblock>
+
+    <p conref="../shared/impala_common.xml#common/restrictions_blurb"/>
+
+    <p 
conref="../shared/impala_common.xml#common/analytic_not_allowed_caveat"/>
+
+    <p conref="../shared/impala_common.xml#common/related_info"/>
+
+    <p>
+      The <codeph>STDDEV()</codeph>, <codeph>STDDEV_POP()</codeph>, and 
<codeph>STDDEV_SAMP()</codeph> functions
+      compute the standard deviation (square root of the variance) based on 
the results of
+      <codeph>VARIANCE()</codeph>, <codeph>VARIANCE_POP()</codeph>, and 
<codeph>VARIANCE_SAMP()</codeph>
+      respectively. See <xref href="impala_stddev.xml#stddev"/> for details 
about the standard deviation property.
+    </p>
+  </conbody>
+</concept>

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/463ddf92/docs/topics/impala_views.xml
----------------------------------------------------------------------
diff --git a/docs/topics/impala_views.xml b/docs/topics/impala_views.xml
new file mode 100644
index 0000000..a6c1a41
--- /dev/null
+++ b/docs/topics/impala_views.xml
@@ -0,0 +1,185 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE concept PUBLIC "-//OASIS//DTD DITA Concept//EN" "concept.dtd">
+<concept rev="1.1" id="views">
+
+  <title>Overview of Impala Views</title>
+  <titlealts><navtitle>Views</navtitle></titlealts>
+  <prolog>
+    <metadata>
+      <data name="Category" value="Impala"/>
+      <data name="Category" value="SQL"/>
+      <data name="Category" value="Data Analysts"/>
+      <data name="Category" value="Developers"/>
+      <data name="Category" value="Querying"/>
+      <data name="Category" value="Tables"/>
+      <data name="Category" value="Schemas"/>
+    </metadata>
+  </prolog>
+
+  <conbody>
+
+    <p>
+      Views are lightweight logical constructs that act as aliases for 
queries. You can specify a view name in a
+      query (a <codeph>SELECT</codeph> statement or the 
<codeph>SELECT</codeph> portion of an
+      <codeph>INSERT</codeph> statement) where you would usually specify a 
table name.
+    </p>
+
+    <p>
+      A view lets you:
+    </p>
+
+    <ul>
+      <li>
+        Issue complicated queries with compact and simple syntax:
+<codeblock>-- Take a complicated reporting query, plug it into a CREATE VIEW 
statement...
+create view v1 as select c1, c2, avg(c3) from t1 group by c3 order by c1 desc 
limit 10;
+-- ... and now you can produce the report with 1 line of code.
+select * from v1;</codeblock>
+      </li>
+
+      <li>
+        Reduce maintenance, by avoiding the duplication of complicated queries 
across multiple applications in
+        multiple languages:
+<codeblock>create view v2 as select t1.c1, t1.c2, t2.c3 from t1 join t2 on 
(t1.id = t2.id);
+-- This simple query is safer to embed in reporting applications than the 
longer query above.
+-- The view definition can remain stable even if the structure of the 
underlying tables changes.
+select c1, c2, c3 from v2;</codeblock>
+      </li>
+
+      <li>
+        Build a new, more refined query on top of the original query by adding 
new clauses, select-list
+        expressions, function calls, and so on:
+<codeblock>create view average_price_by_category as select category, 
avg(price) as avg_price from products group by category;
+create view expensive_categories as select category, avg_price from 
average_price_by_category order by avg_price desc limit 10000;
+create view top_10_expensive_categories as select category, avg_price from 
expensive_categories limit 10;</codeblock>
+        This technique lets you build up several more or less granular 
variations of the same query, and switch
+        between them when appropriate.
+<!-- My original assumption was confirmed correct by Alex: outer ORDER BY not 
actually needed.
+In this case, we put an <codeph>ORDER BY</codeph> clause on the <q>top 10</q> 
view, even though there was already an <codeph>ORDER BY</codeph>
+on the <q>top 10000</q> view, because when a query is executed in parallel and 
distributed among multiple nodes, the ordering is only
+guaranteed if there is an <codeph>ORDER BY</codeph> clause at the outermost 
level.
+-->
+      </li>
+
+      <li>
+        Set up aliases with intuitive names for tables, columns, result sets 
from joins, and so on:
+<codeblock>-- The original tables might have cryptic names inherited from a 
legacy system.
+create view action_items as select rrptsk as assignee, treq as due_date, dmisc 
as notes from vxy_t1_br;
+-- You can leave original names for compatibility, build new applications 
using more intuitive ones.
+select assignee, due_date, notes from action_items;</codeblock>
+      </li>
+
+      <li>
+        Swap tables with others that use different file formats, partitioning 
schemes, and so on without any
+        downtime for data copying or conversion:
+<codeblock>create table slow (x int, s string) stored as textfile;
+create view report as select s from slow where x between 20 and 30;
+-- Query is kind of slow due to inefficient table definition, but it works.
+select * from report;
+
+create table fast (s string) partitioned by (x int) stored as parquet;
+-- ...Copy data from SLOW to FAST. Queries against REPORT view continue to 
work...
+
+-- After changing the view definition, queries will be faster due to 
partitioning,
+-- binary format, and compression in the new table.
+alter view report as select s from fast where x between 20 and 30;
+select * from report;</codeblock>
+      </li>
+
+      <li>
+        Avoid coding lengthy subqueries and repeating the same subquery text 
in many other queries.
+      </li>
+
+      <li rev="2.3.0 collevelauth">
+        Set up fine-grained security where a user can query some columns from 
a table but not other columns.
+        Because CDH 5.5 / Impala 2.3 and higher support column-level 
authorization, this technique is no longer
+        required. If you formerly implemented column-level security through 
views, see
+        <xref 
href="sg_hive_sql.xml#concept_c2q_4qx_p4/col_level_auth_sentry"/> for details 
about the
+        column-level authorization feature.
+        <!-- See <xref 
href="impala_authorization.xml#security_examples/sec_ex_views"/> for details. 
-->
+      </li>
+    </ul>
+
+    <p>
+      The SQL statements that configure views are <xref 
href="impala_create_view.xml#create_view"/>,
+      <xref href="impala_alter_view.xml#alter_view"/>, and <xref 
href="impala_drop_view.xml#drop_view"/>. You can
+      specify view names when querying data (<xref 
href="impala_select.xml#select"/>) and copying data from one
+      table to another (<xref href="impala_insert.xml#insert"/>). The <xref 
href="impala_with.xml#with">WITH</xref>
+      clause creates an inline view, that only exists for the duration of a 
single query.
+    </p>
+
+<codeblock>[localhost:21000] &gt; create view trivial as select * from 
customer;
+[localhost:21000] &gt; create view some_columns as select c_first_name, 
c_last_name, c_login from customer;
+[localhost:21000] &gt; select * from some_columns limit 5;
+Query finished, fetching results ...
++--------------+-------------+---------+
+| c_first_name | c_last_name | c_login |
++--------------+-------------+---------+
+| Javier       | Lewis       |         |
+| Amy          | Moses       |         |
+| Latisha      | Hamilton    |         |
+| Michael      | White       |         |
+| Robert       | Moran       |         |
++--------------+-------------+---------+
+[localhost:21000] &gt; create view ordered_results as select * from 
some_columns order by c_last_name desc, c_first_name desc limit 1000;
+[localhost:21000] &gt; select * from ordered_results limit 5;
+Query: select * from ordered_results limit 5
+Query finished, fetching results ...
++--------------+-------------+---------+
+| c_first_name | c_last_name | c_login |
++--------------+-------------+---------+
+| Thomas       | Zuniga      |         |
+| Sarah        | Zuniga      |         |
+| Norma        | Zuniga      |         |
+| Lloyd        | Zuniga      |         |
+| Lisa         | Zuniga      |         |
++--------------+-------------+---------+
+Returned 5 row(s) in 0.48s</codeblock>
+
+    <p>
+      The previous example uses descending order for 
<codeph>ORDERED_RESULTS</codeph> because in the sample TPCD-H
+      data, there are some rows with empty strings for both 
<codeph>C_FIRST_NAME</codeph> and
+      <codeph>C_LAST_NAME</codeph>, making the lowest-ordered names unuseful 
in a sample query.
+    </p>
+
+<codeblock>create view visitors_by_day as select day, count(distinct visitors) 
as howmany from web_traffic group by day;
+create view top_10_days as select day, howmany from visitors_by_day order by 
howmany limit 10;
+select * from top_10_days;</codeblock>
+
+    <p conref="../shared/impala_common.xml#common/usage_notes_blurb"/>
+
+    <p conref="../shared/impala_common.xml#common/describe_formatted_view"/>
+
+    <p conref="../shared/impala_common.xml#common/create_table_like_view"/>
+
+    <p conref="../shared/impala_common.xml#common/complex_types_blurb"/>
+
+    <p conref="../shared/impala_common.xml#common/complex_types_views"/>
+
+    <p conref="../shared/impala_common.xml#common/restrictions_blurb"/>
+
+    <ul>
+      <li>
+        <p>
+          You cannot insert into an Impala view. (In some database systems, 
this operation is allowed and inserts
+          rows into the base table.) You can use a view name on the right-hand 
side of an <codeph>INSERT</codeph>
+          statement, in the <codeph>SELECT</codeph> part.
+        </p>
+      </li>
+
+      <li>
+<!-- This same text is conref'ed in the #views and the #partition_pruning 
topics. -->
+        <p conref="../shared/impala_common.xml#common/partitions_and_views"/>
+      </li>
+
+      <li rev="1.4.0">
+        <p 
conref="../shared/impala_common.xml#common/order_by_view_restriction"/>
+      </li>
+    </ul>
+
+    <p>
+      <b>Related statements:</b> <xref 
href="impala_create_view.xml#create_view"/>,
+      <xref href="impala_alter_view.xml#alter_view"/>, <xref 
href="impala_drop_view.xml#drop_view"/>
+    </p>
+  </conbody>
+</concept>

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/463ddf92/docs/topics/impala_with.xml
----------------------------------------------------------------------
diff --git a/docs/topics/impala_with.xml b/docs/topics/impala_with.xml
new file mode 100644
index 0000000..8d1001c
--- /dev/null
+++ b/docs/topics/impala_with.xml
@@ -0,0 +1,64 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE concept PUBLIC "-//OASIS//DTD DITA Concept//EN" "concept.dtd">
+<concept rev="1.1" id="with">
+
+  <title>WITH Clause</title>
+  <prolog>
+    <metadata>
+      <data name="Category" value="Impala"/>
+      <data name="Category" value="SQL"/>
+      <data name="Category" value="Querying"/>
+    </metadata>
+  </prolog>
+
+  <conbody>
+
+    <p>
+      A clause that can be added before a <codeph>SELECT</codeph> statement, 
to define aliases for complicated
+      expressions that are referenced multiple times within the body of the 
<codeph>SELECT</codeph>. Similar to
+      <codeph>CREATE VIEW</codeph>, except that the table and column names 
defined in the <codeph>WITH</codeph>
+      clause do not persist after the query finishes, and do not conflict with 
names used in actual tables or
+      views. Also known as <q>subquery factoring</q>.
+    </p>
+
+    <p>
+      You can rewrite a query using subqueries to work the same as with the 
<codeph>WITH</codeph> clause. The
+      purposes of the <codeph>WITH</codeph> clause are:
+    </p>
+
+    <ul>
+      <li>
+        Convenience and ease of maintenance from less repetition with the body 
of the query. Typically used with
+        queries involving <codeph>UNION</codeph>, joins, or aggregation 
functions where the similar complicated
+        expressions are referenced multiple times.
+      </li>
+
+      <li>
+        SQL code that is easier to read and understand by abstracting the most 
complex part of the query into a
+        separate block.
+      </li>
+
+      <li>
+        Improved compatibility with SQL from other database systems that 
support the same clause (primarily Oracle
+        Database).
+        <note>
+          <p>
+            The Impala <codeph>WITH</codeph> clause does not support recursive 
queries in the
+            <codeph>WITH</codeph>, which is supported in some other database 
systems.
+          </p>
+        </note>
+      </li>
+    </ul>
+
+    <p conref="../shared/impala_common.xml#common/sql1999"/>
+
+    <p conref="../shared/impala_common.xml#common/example_blurb"/>
+
+<codeblock>-- Define 2 subqueries that can be referenced from the body of a 
longer query.
+with t1 as (select 1), t2 as (select 2) insert into tab select * from t1 union 
all select * from t2;
+
+-- Define one subquery at the outer level, and another at the inner level as 
part of the
+-- initial stage of the UNION ALL query.
+with t1 as (select 1) (with t2 as (select 2) select * from t2) union all 
select * from t1;</codeblock>
+  </conbody>
+</concept>

[01/22] incubator-impala git commit: First try at porting over the source files necessary for the Impala SQL Reference.

Reply via email to