[1/7] incubator-impala git commit: Updates to DML statements for Impala + Kudu

tarmstrong Mon, 30 Jan 2017 09:01:01 -0800

Repository: incubator-impala
Updated Branches:
  refs/heads/master f590bc0da -> dde559b31



Updates to DML statements for Impala + Kudu

Fill in syntax, usage notes, examples for
UPDATE, DELETE, UPSERT. Take out IGNORE from
INSERT.

Add 2nd syntax form and examples for DELETE.

Add join syntax to UPDATE.

Change-Id: I60512b7957fb53d86d3123a4f1d46fbb355f4665
Reviewed-on: http://gerrit.cloudera.org:8080/5646
Reviewed-by: Matthew Jacobs <[email protected]>
Tested-by: Impala Public Jenkins


Project: http://git-wip-us.apache.org/repos/asf/incubator-impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-impala/commit/fc721fbd
Tree: http://git-wip-us.apache.org/repos/asf/incubator-impala/tree/fc721fbd
Diff: http://git-wip-us.apache.org/repos/asf/incubator-impala/diff/fc721fbd

Branch: refs/heads/master
Commit: fc721fbd0691e03cee3f2e77dadd71da263a6d72
Parents: f590bc0
Author: John Russell <[email protected]>
Authored: Mon Jan 9 14:03:07 2017 -0800
Committer: Impala Public Jenkins <[email protected]>
Committed: Fri Jan 27 21:12:56 2017 +0000

----------------------------------------------------------------------
 docs/shared/impala_common.xml |  32 ++++++++-
 docs/topics/impala_delete.xml | 136 +++++++++++++++++++++++++++++++++----
 docs/topics/impala_insert.xml |  39 ++++++++---
 docs/topics/impala_update.xml | 130 ++++++++++++++++++++++++++++++-----
 docs/topics/impala_upsert.xml |  42 +++++++++---
 5 files changed, 330 insertions(+), 49 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/fc721fbd/docs/shared/impala_common.xml
----------------------------------------------------------------------
diff --git a/docs/shared/impala_common.xml b/docs/shared/impala_common.xml
index 8990dbb..7b3e697 100644
--- a/docs/shared/impala_common.xml
+++ b/docs/shared/impala_common.xml
@@ -2720,6 +2720,10 @@ select max(height), avg(height) from census_data where 
age &gt; 20;
         <xref href="../topics/impala_sync_ddl.xml#sync_ddl">SYNC_DDL</xref> 
query option)
       </p>
 
+      <p id="dml_blurb_kudu" rev="kudu">
+        <b>Statement type:</b> DML
+      </p>
+
       <p rev="1.2" id="sync_ddl_blurb">
         If you connect to different Impala nodes within an 
<cmdname>impala-shell</cmdname> session for
         load-balancing purposes, you can enable the <codeph>SYNC_DDL</codeph> 
query option to make each DDL
@@ -3691,14 +3695,38 @@ sudo pip-python install ssl</codeblock>
         around sharing content between the Impala documentation and the Kudu 
documentation.
       </p>
 
-      <p id="kudu_blurb">
+      <p id="kudu_blurb" rev="kudu 2.8.0">
         <b>Kudu considerations:</b>
       </p>
 
-      <p id="kudu_no_load_data">
+      <p id="kudu_no_load_data" rev="kudu">
         The <codeph>LOAD DATA</codeph> statement cannot be used with Kudu 
tables.
       </p>
 
+      <p id="kudu_no_truncate_table" rev="kudu">
+        Currently, the <codeph>TRUNCATE TABLE</codeph> statement cannot be 
used with Kudu tables.
+      </p>
+
+      <p id="kudu_no_insert_overwrite" rev="kudu">
+        Currently, the <codeph>INSERT OVERWRITE</codeph> syntax cannot be used 
with Kudu tables.
+      </p>
+
+      <p id="kudu_unsupported_data_type" rev="kudu">
+        Currently, the data types <codeph>DECIMAL</codeph>, 
<codeph>TIMESTAMP</codeph>, <codeph>CHAR</codeph>, <codeph>VARCHAR</codeph>,
+        <codeph>ARRAY</codeph>, <codeph>MAP</codeph>, and 
<codeph>STRUCT</codeph> cannot be used with Kudu tables.
+      </p>
+
+      <p id="kudu_non_pk_data_type" rev="kudu">
+        Currently, the data types <codeph>BOOLEAN</codeph>, 
<codeph>FLOAT</codeph>,
+        and <codeph>DOUBLE</codeph> cannot be used for primary key columns in 
Kudu tables.
+      </p>
+
+      <p id="pk_implies_not_null" rev="kudu">
+        Because all of the primary key columns must have non-null values, 
specifying a column
+        in the <codeph>PRIMARY KEY</codeph> clause implicitly adds the 
<codeph>NOT
+        NULL</codeph> attribute to that column.
+      </p>
+
     </section>
 
   </conbody>

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/fc721fbd/docs/topics/impala_delete.xml
----------------------------------------------------------------------
diff --git a/docs/topics/impala_delete.xml b/docs/topics/impala_delete.xml
index af20d19..c8591b6 100644
--- a/docs/topics/impala_delete.xml
+++ b/docs/topics/impala_delete.xml
@@ -18,7 +18,7 @@ specific language governing permissions and limitations
 under the License.
 -->
 <!DOCTYPE concept PUBLIC "-//OASIS//DTD DITA Concept//EN" "concept.dtd">
-<concept id="delete">
+<concept id="delete" rev="kudu">
 
   <title>DELETE Statement (<keyword keyref="impala28"/> or higher only)</title>
   <titlealts audience="PDF"><navtitle>DELETE</navtitle></titlealts>
@@ -39,43 +39,149 @@ under the License.
 
     <p>
       <indexterm audience="hidden">DELETE statement</indexterm>
-      Deletes one or more rows from a Kudu table.
-      Although deleting a single row or a range of rows would be inefficient 
for tables using HDFS
-      data files, Kudu is able to perform this operation efficiently. 
Therefore, this statement
-      only works for Impala tables that use the Kudu storage engine.
+      Deletes an arbitrary number of rows from a Kudu table.
+      This statement only works for Impala tables that use the Kudu storage 
engine.
     </p>
 
     <p conref="../shared/impala_common.xml#common/syntax_blurb"/>
 
 <codeblock>
+DELETE [FROM] [<varname>database_name</varname>.]<varname>table_name</varname> 
[ WHERE <varname>where_conditions</varname> ]
+
+DELETE <varname>table_ref</varname> FROM 
[<varname>joined_table_refs</varname>] [ WHERE 
<varname>where_conditions</varname> ]
 </codeblock>
 
-    <p rev="kudu">
-      Normally, a <codeph>DELETE</codeph> operation for a Kudu table fails if
-      some partition key columns are not found, due to their being deleted or 
changed
-      by a concurrent <codeph>UPDATE</codeph> or <codeph>DELETE</codeph> 
operation.
-      Specify <codeph>DELETE IGNORE 
<varname>rest_of_statement</varname></codeph> to
-      make the <codeph>DELETE</codeph> continue in this case. The rows with 
the nonexistent
-      duplicate partition key column values are not removed.
+    <p>
+      The first form evaluates rows from one table against an optional
+      <codeph>WHERE</codeph> clause, and deletes all the rows that
+      match the <codeph>WHERE</codeph> conditions, or all rows if
+      <codeph>WHERE</codeph> is omitted.
     </p>
 
-    <p conref="../shared/impala_common.xml#common/dml_blurb"/>
+    <p>
+      The second form evaluates one or more join clauses, and deletes
+      all matching rows from one of the tables. The join clauses can
+      include non-Kudu tables, but the table from which the rows
+      are deleted must be a Kudu table. The <codeph>FROM</codeph>
+      keyword is required in this case, to separate the name of
+      the table whose rows are being deleted from the table names
+      of the join clauses.
+    </p>
 
     <p conref="../shared/impala_common.xml#common/usage_notes_blurb"/>
 
-    <p conref="../shared/impala_common.xml#common/sync_ddl_blurb"/>
+    <p>
+      The conditions in the <codeph>WHERE</codeph> clause are the same ones 
allowed
+      for the <codeph>SELECT</codeph> statement. See <xref 
href="impala_select.xml#select"/>
+      for details.
+    </p>
+
+    <p>
+      The conditions in the <codeph>WHERE</codeph> clause can refer to
+      any combination of primary key columns or other columns. Referring to
+      primary key columns in the <codeph>WHERE</codeph> clause is more 
efficient
+      than referring to non-primary key columns.
+    </p>
+
+    <p>
+      If the <codeph>WHERE</codeph> clause is omitted, all rows are removed 
from the table.
+    </p>
+
+    <p>
+      Because Kudu currently does not enforce strong consistency during 
concurrent DML operations,
+      be aware that the results after this statement finishes might be 
different than you
+      intuitively expect:
+    </p>
+    <ul>
+      <li>
+        <p>
+          If some rows cannot be deleted because their
+          some primary key columns are not found, due to their being deleted
+          by a concurrent <codeph>DELETE</codeph> operation,
+          the statement succeeds but returns a warning.
+        </p>
+      </li>
+      <li>
+        <p>
+          A <codeph>DELETE</codeph> statement might also overlap with
+          <codeph>INSERT</codeph>, <codeph>UPDATE</codeph>,
+          or <codeph>UPSERT</codeph> statements running concurrently on the 
same table.
+          After the statement finishes, there might be more or fewer rows than 
expected in the table
+          because it is undefined whether the <codeph>DELETE</codeph> applies 
to rows that are
+          inserted or updated while the <codeph>DELETE</codeph> is in progress.
+        </p>
+      </li>
+    </ul>
+
+    <p>
+      The number of affected rows is reported in an 
<cmdname>impala-shell</cmdname> message
+      and in the query profile.
+    </p>
+
+    <p conref="../shared/impala_common.xml#common/dml_blurb_kudu"/>
 
     <note conref="../shared/impala_common.xml#common/compute_stats_next"/>
 
     <p conref="../shared/impala_common.xml#common/example_blurb"/>
+
+    <p>
+      The following examples show how to delete rows from a specified
+      table, either all rows or rows that match a <codeph>WHERE</codeph>
+      clause:
+    </p>
+
 <codeblock>
+-- Deletes all rows. The FROM keyword is optional.
+DELETE FROM kudu_table;
+DELETE kudu_table;
+
+-- Deletes 0, 1, or more rows.
+-- (If c1 is a single-column primary key, the statement could only
+-- delete 0 or 1 rows.)
+DELETE FROM kudu_table WHERE c1 = 100;
+
+-- Deletes all rows that match all the WHERE conditions.
+DELETE FROM kudu_table WHERE
+  (c1 > c2 OR c3 IN ('hello','world')) AND c4 IS NOT NULL;
+DELETE FROM t1 WHERE
+  (c1 IN (1,2,3) AND c2 > c3) OR c4 IS NOT NULL;
+DELETE FROM time_series WHERE
+  year = 2016 AND month IN (11,12) AND day > 15;
+
+-- WHERE condition with a subquery.
+DELETE FROM t1 WHERE
+  c5 IN (SELECT DISTINCT other_col FROM other_table);
+
+-- Does not delete any rows, because the WHERE condition is always false.
+DELETE FROM kudu_table WHERE 1 = 0;
+</codeblock>
+
+    <p>
+      The following examples show how to delete rows that are part
+      of the result set from a join:
+    </p>
+
+<codeblock>
+-- Remove _all_ rows from t1 that have a matching X value in t2.
+DELETE t1 FROM t1 JOIN t2 ON t1.x = t2.x;
+
+-- Remove _some_ rows from t1 that have a matching X value in t2.
+DELETE t1 FROM t1 JOIN t2 ON t1.x = t2.x
+  WHERE t1.y = FALSE and t2.z > 100;
+
+-- Delete from a Kudu table based on a join with a non-Kudu table.
+DELETE t1 FROM kudu_table t1 JOIN non_kudu_table t2 ON t1.x = t2.x;
 
+-- The tables can be joined in any order as long as the Kudu table
+-- is specified as the deletion target.
+DELETE t2 FROM non_kudu_table t1 JOIN kudu_table t2 ON t1.x = t2.x;
 </codeblock>
 
     <p conref="../shared/impala_common.xml#common/related_info"/>
 
     <p>
-      <xref href="impala_kudu.xml#impala_kudu"/>
+      <xref href="impala_kudu.xml#impala_kudu"/>, <xref 
href="impala_insert.xml#insert"/>,
+      <xref href="impala_update.xml#update"/>, <xref 
href="impala_upsert.xml#upsert"/>
     </p>
 
   </conbody>

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/fc721fbd/docs/topics/impala_insert.xml
----------------------------------------------------------------------
diff --git a/docs/topics/impala_insert.xml b/docs/topics/impala_insert.xml
index ed9f78f..7e7b76b 100644
--- a/docs/topics/impala_insert.xml
+++ b/docs/topics/impala_insert.xml
@@ -33,7 +33,7 @@ under the License.
       <data name="Category" value="Developers"/>
       <data name="Category" value="Tables"/>
       <data name="Category" value="S3"/>
-      <!-- <data name="Category" value="Kudu"/> -->
+      <data name="Category" value="Kudu"/>
       <!-- This is such an important statement, think if there are more 
applicable categories. -->
     </metadata>
   </prolog>
@@ -90,16 +90,39 @@ hint_clause ::= [SHUFFLE] | [NOSHUFFLE]    (Note: the 
square brackets are part o
       See <xref href="impala_complex_types.xml#complex_types"/> for details 
about working with complex types.
     </p>
 
+    <p conref="../shared/impala_common.xml#common/kudu_blurb"/>
+
+    <p conref="../shared/impala_common.xml#common/kudu_no_insert_overwrite"/>
+
     <p rev="kudu">
-      <b>Ignoring duplicate partition keys for Kudu tables (IGNORE clause)</b>
+      Kudu tables require a unique primary key for each row. If an 
<codeph>INSERT</codeph>
+      statement attempts to insert a row with the same values for the primary 
key columns
+      as an existing row, that row is discarded and the insert operation 
continues.
+      When rows are discarded due to duplicate primary keys, the statement 
finishes
+      with a warning, not an error. (This is a change from early releases of 
Kudu
+      where the default was to return in error in such cases, and the syntax
+      <codeph>INSERT IGNORE</codeph> was required to make the statement 
succeed.
+      The <codeph>IGNORE</codeph> clause is no longer part of the 
<codeph>INSERT</codeph>
+      syntax.)
     </p>
 
-    <p rev="kudu">
-      Normally, an <codeph>INSERT</codeph> operation into a Kudu table fails if
-      it would result in duplicate partition key columns for any rows.
-      Specify <codeph>INSERT IGNORE 
<varname>rest_of_statement</varname></codeph> to
-      make the <codeph>INSERT</codeph> continue in this case. The rows that 
would
-      have duplicate partition key columns are not inserted.
+    <p>
+      For situations where you prefer to replace rows with duplicate primary 
key values,
+      rather than discarding the new data, you can use the 
<codeph>UPSERT</codeph>
+      statement instead of <codeph>INSERT</codeph>. <codeph>UPSERT</codeph> 
inserts
+      rows that are entirely new, and for rows that match an existing primary 
key in the
+      table, the non-primary-key columns are updated to reflect the values in 
the
+      <q>upserted</q> data.
+    </p>
+
+    <p>
+      If you really want to store new rows, not replace existing ones, but 
cannot do so
+      because of the primary key uniqueness constraint, consider recreating 
the table
+      with additional columns included in the primary key.
+    </p>
+
+    <p>
+      See <xref href="impala_kudu.xml#impala_kudu"/> for more details about 
using Impala with Kudu.
     </p>
 
     <p conref="../shared/impala_common.xml#common/usage_notes_blurb"/>

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/fc721fbd/docs/topics/impala_update.xml
----------------------------------------------------------------------
diff --git a/docs/topics/impala_update.xml b/docs/topics/impala_update.xml
index 3f78a17..0d3250b 100644
--- a/docs/topics/impala_update.xml
+++ b/docs/topics/impala_update.xml
@@ -18,7 +18,7 @@ specific language governing permissions and limitations
 under the License.
 -->
 <!DOCTYPE concept PUBLIC "-//OASIS//DTD DITA Concept//EN" "concept.dtd">
-<concept id="update">
+<concept id="update" rev="kudu">
 
   <title>UPDATE Statement (<keyword keyref="impala28"/> or higher only)</title>
   <titlealts audience="PDF"><navtitle>UPDATE</navtitle></titlealts>
@@ -39,43 +39,141 @@ under the License.
 
     <p>
       <indexterm audience="hidden">UPDATE statement</indexterm>
-      Updates one or more rows from a Kudu table.
-      Although updating a single row or a range of rows would be inefficient 
for tables using HDFS
-      data files, Kudu is able to perform this operation efficiently. 
Therefore, this statement
-      only works for Impala tables that use the Kudu storage engine.
+      Updates an arbitrary number of rows in a Kudu table.
+      This statement only works for Impala tables that use the Kudu storage 
engine.
     </p>
 
     <p conref="../shared/impala_common.xml#common/syntax_blurb"/>
 
 <codeblock>
+UPDATE [<varname>database_name</varname>.]<varname>table_name</varname> SET 
<varname>col</varname> = <varname>val</varname> [, <varname>col</varname> = 
<varname>val</varname> ... ]
+  [ FROM <varname>joined_table_refs</varname> ]
+  [ WHERE <varname>where_conditions</varname> ]
 </codeblock>
 
-    <p rev="kudu">
-      Normally, an <codeph>UPDATE</codeph> operation for a Kudu table fails if
-      some partition key columns are not found, due to their being deleted or 
changed
-      by a concurrent <codeph>UPDATE</codeph> or <codeph>DELETE</codeph> 
operation.
-      Specify <codeph>UPDATE IGNORE 
<varname>rest_of_statement</varname></codeph> to
-      make the <codeph>UPDATE</codeph> continue in this case. The rows with 
the nonexistent
-      duplicate partition key column values are not changed.
+    <p conref="../shared/impala_common.xml#common/usage_notes_blurb"/>
+
+    <p>
+      None of the columns that make up the primary key can be updated by the
+      <codeph>SET</codeph> clause.
     </p>
 
-    <p conref="../shared/impala_common.xml#common/dml_blurb"/>
+    <p>
+      The conditions in the <codeph>WHERE</codeph> clause are the same ones 
allowed
+      for the <codeph>SELECT</codeph> statement. See <xref 
href="impala_select.xml#select"/>
+      for details.
+    </p>
 
-    <p conref="../shared/impala_common.xml#common/usage_notes_blurb"/>
+    <p>
+      If the <codeph>WHERE</codeph> clause is omitted, all rows in the table 
are updated.
+    </p>
+
+    <p>
+      The conditions in the <codeph>WHERE</codeph> clause can refer to
+      any combination of primary key columns or other columns. Referring to
+      primary key columns in the <codeph>WHERE</codeph> clause is more 
efficient
+      than referring to non-primary key columns.
+    </p>
+
+    <p>
+      Because Kudu currently does not enforce strong consistency during 
concurrent DML operations,
+      be aware that the results after this statement finishes might be 
different than you
+      intuitively expect:
+    </p>
+    <ul>
+      <li>
+        <p>
+          If some rows cannot be updated because their
+          some primary key columns are not found, due to their being deleted
+          by a concurrent <codeph>DELETE</codeph> operation,
+          the statement succeeds but returns a warning.
+        </p>
+      </li>
+      <li>
+        <p>
+          An <codeph>UPDATE</codeph> statement might also overlap with
+          <codeph>INSERT</codeph>, <codeph>UPDATE</codeph>,
+          or <codeph>UPSERT</codeph> statements running concurrently on the 
same table.
+          After the statement finishes, there might be more or fewer matching 
rows than expected
+          in the table because it is undefined whether the 
<codeph>UPDATE</codeph> applies to rows
+          that are inserted or updated while the <codeph>UPDATE</codeph> is in 
progress.
+        </p>
+      </li>
+    </ul>
+
+    <p>
+      The number of affected rows is reported in an 
<cmdname>impala-shell</cmdname> message
+      and in the query profile.
+    </p>
+
+    <p>
+      The optional <codeph>FROM</codeph> clause lets you restrict the
+      updates to only the rows in the specified table that are part
+      of the result set for a join query. The join clauses can include
+      non-Kudu tables, but the table from which the rows are deleted
+      must be a Kudu table.
+    </p>
 
-    <p conref="../shared/impala_common.xml#common/sync_ddl_blurb"/>
+    <p conref="../shared/impala_common.xml#common/dml_blurb_kudu"/>
 
     <note conref="../shared/impala_common.xml#common/compute_stats_next"/>
 
     <p conref="../shared/impala_common.xml#common/example_blurb"/>
+
+    <p>
+      The following examples show how to perform a simple update
+      on a table, with or without a <codeph>WHERE</codeph> clause:
+    </p>
+
 <codeblock>
+-- Set all rows to the same value for column c3.
+-- In this case, c1 and c2 are primary key columns
+-- and so cannot be updated.
+UPDATE kudu_table SET c3 = 'not applicable';
 
+-- Update only the rows that match the condition.
+UPDATE kudu_table SET c3 = NULL WHERE c1 > 100 AND c3 IS NULL;
+
+-- Does not update any rows, because the WHERE condition is always false.
+UPDATE kudu_table SET c3 = 'impossible' WHERE 1 = 0;
+
+-- Change the values of multiple columns in a single UPDATE statement.
+UPDATE kudu_table SET c3 = upper(c3), c4 = FALSE, c5 = 0 WHERE c6 = TRUE;
+</codeblock>
+
+    <p>
+      The following examples show how to perform an update using the
+      <codeph>FROM</codeph> keyword with a join clause:
+    </p>
+
+<codeblock>
+-- Uppercase a column value, only for rows that have
+-- an ID that matches the value from another table.
+UPDATE kudu_table SET c3 = upper(c3)
+  FROM kudu_table JOIN non_kudu_table
+  ON kudu_table.id = non_kudu_table.id;
+
+-- Same effect as previous statement.
+-- Assign table aliases in FROM clause, then refer to
+-- short names elsewhere in the statement.
+UPDATE t1 SET c3 = upper(c3)
+  FROM kudu_table t1 JOIN non_kudu_table t2
+  ON t1.id = t2.id;
+
+-- Same effect as previous statements, but more efficient.
+-- Use WHERE clause to skip updating values that are
+-- already uppercase.
+UPDATE t1 SET c3 = upper(c3)
+  FROM kudu_table t1 JOIN non_kudu_table t2
+  ON t1.id = t2.id
+  WHERE c3 != upper(c3);
 </codeblock>
 
     <p conref="../shared/impala_common.xml#common/related_info"/>
 
     <p>
-      <xref href="impala_kudu.xml#impala_kudu"/>
+      <xref href="impala_kudu.xml#impala_kudu"/>, <xref 
href="impala_insert.xml#insert"/>,
+      <xref href="impala_delete.xml#delete"/>, <xref 
href="impala_upsert.xml#upsert"/>
     </p>
 
   </conbody>

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/fc721fbd/docs/topics/impala_upsert.xml
----------------------------------------------------------------------
diff --git a/docs/topics/impala_upsert.xml b/docs/topics/impala_upsert.xml
index d4e880f..5830675 100644
--- a/docs/topics/impala_upsert.xml
+++ b/docs/topics/impala_upsert.xml
@@ -18,7 +18,7 @@ specific language governing permissions and limitations
 under the License.
 -->
 <!DOCTYPE concept PUBLIC "-//OASIS//DTD DITA Concept//EN" "concept.dtd">
-<concept id="upsert">
+<concept id="upsert" rev="kudu IMPALA-3725">
 
   <title>UPSERT Statement (<keyword keyref="impala28"/> or higher only)</title>
   <titlealts audience="PDF"><navtitle>UPSERT</navtitle></titlealts>
@@ -62,28 +62,54 @@ under the License.
     </p>
 
     <p>
-      Although inserting or updating a single row or a small set of rows would 
be inefficient for tables using HDFS
-      data files, Kudu is able to perform this operation efficiently.
-      Therefore, this statement only works for Impala tables that use the
-      Kudu storage engine.
+      This statement only works for Impala tables that use the Kudu storage 
engine.
     </p>
 
     <p conref="../shared/impala_common.xml#common/syntax_blurb"/>
 
 <codeblock>
+UPSERT INTO [TABLE] [<varname>db_name</varname>.]<varname>table_name</varname>
+  [(<varname>column_list</varname>)]
+{
+    [<varname>hint_clause</varname>] <varname>select_statement</varname>
+  | VALUES (<varname>value</varname> [, <varname>value</varname> ...]) [, 
(<varname>value</varname> [, <varname>value</varname> ...]) ...]
+}
+
+hint_clause ::= [SHUFFLE] | [NOSHUFFLE]
+  (Note: the square brackets are part of the syntax.)
 </codeblock>
 
-    <p conref="../shared/impala_common.xml#common/dml_blurb"/>
+    <p>
+      The <varname>select_statement</varname> clause can use the full syntax, 
such as
+      <codeph>WHERE</codeph> and join clauses, as <xref 
href="impala_select.xml#select"/>.
+    </p>
+
+    <p conref="../shared/impala_common.xml#common/dml_blurb_kudu"/>
 
     <p conref="../shared/impala_common.xml#common/usage_notes_blurb"/>
 
-    <p conref="../shared/impala_common.xml#common/sync_ddl_blurb"/>
+    <p>
+      If you specify a column list, any omitted columns in the inserted or 
updated rows are
+      set to their default value (if the column has one) or 
<codeph>NULL</codeph> (if the
+      column does not have a default value). Therefore, if a column is not 
nullable and
+      has no default value, it must be included in the column list for any 
<codeph>UPSERT</codeph>
+      statement. Because all primary key columns meet these conditions, all 
the primary key
+      columns must be specified in every <codeph>UPSERT</codeph> statement.
+    </p>
+
+    <p>
+      Because Kudu tables can efficiently handle small incremental changes, 
the <codeph>VALUES</codeph>
+      clause is more practical to use with Kudu tables than with HDFS-based 
tables.
+    </p>
 
     <note conref="../shared/impala_common.xml#common/compute_stats_next"/>
 
     <p conref="../shared/impala_common.xml#common/example_blurb"/>
-<codeblock>
 
+<codeblock>
+UPSERT INTO kudu_table (pk, c1, c2, c3) VALUES (0, 'hello', 50, true), (1, 
'world', -1, false);
+UPSERT INTO production_table SELECT * FROM staging_table;
+UPSERT INTO production_table SELECT * FROM staging_table WHERE c1 IS NOT NULL 
AND c2 > 0;
 </codeblock>
 
     <p conref="../shared/impala_common.xml#common/related_info"/>

[1/7] incubator-impala git commit: Updates to DML statements for Impala + Kudu

Reply via email to