This is an automated email from the ASF dual-hosted git repository. boroknagyz pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/impala.git
The following commit(s) were added to refs/heads/master by this push: new dea8546d8 IMPALA-12653: Update documentation about the UPDATE statement dea8546d8 is described below commit dea8546d80323f706d48c5f3ccc310b9ccbd1eae Author: Zoltan Borok-Nagy <borokna...@cloudera.com> AuthorDate: Tue Dec 19 16:00:59 2023 +0100 IMPALA-12653: Update documentation about the UPDATE statement This patch adds documentation about the UPDATE statement. Change-Id: I2a4f3dcdba5faaa7dffda60b8590d09e6a92a165 Reviewed-on: http://gerrit.cloudera.org:8080/20818 Tested-by: Impala Public Jenkins <impala-public-jenk...@cloudera.com> Reviewed-by: Noemi Pap-Takacs <npaptak...@cloudera.com> Reviewed-by: Andrew Sherman <asher...@cloudera.com> --- docs/impala_keydefs.ditamap | 1 + docs/topics/impala_iceberg.xml | 32 +++++++++++++++++++++++-- docs/topics/impala_update.xml | 53 +++++++++++++++++++++++------------------- 3 files changed, 60 insertions(+), 26 deletions(-) diff --git a/docs/impala_keydefs.ditamap b/docs/impala_keydefs.ditamap index ca1582410..f97d10a6f 100644 --- a/docs/impala_keydefs.ditamap +++ b/docs/impala_keydefs.ditamap @@ -10529,6 +10529,7 @@ under the License. <keydef href="https://issues.apache.org/jira/browse/IMPALA-9999" scope="external" format="html" keys="IMPALA-9999"/> <!-- Short form of mapping from Impala release to vendor-specific releases, for use in headings. --> + <keydef keys="impala44"><topicmeta><keywords><keyword>Impala 4.4</keyword></keywords></topicmeta></keydef> <keydef keys="impala43"><topicmeta><keywords><keyword>Impala 4.3</keyword></keywords></topicmeta></keydef> <keydef keys="impala42"><topicmeta><keywords><keyword>Impala 4.2</keyword></keywords></topicmeta></keydef> <keydef keys="impala41"><topicmeta><keywords><keyword>Impala 4.1</keyword></keywords></topicmeta></keydef> diff --git a/docs/topics/impala_iceberg.xml b/docs/topics/impala_iceberg.xml index 66dd7fd8c..fce5aaa76 100644 --- a/docs/topics/impala_iceberg.xml +++ b/docs/topics/impala_iceberg.xml @@ -494,8 +494,36 @@ DELETE FROM ice_t where i = 3; <title>Updating data int Iceberg tables</title> <conbody> <p> - Impala does not yet support the UPDATE statement. One can mimic its behaviour by calling - DELETE + INSERT statements one after the other, but please note that this creates two transactions. + Since <keyword keyref="impala44"/> Impala is able to run <codeph>UPDATE</codeph> statements against + Iceberg V2 tables. E.g.: + <codeblock> +UPDATE ice_t SET val = val + 1; +UPDATE ice_t SET k = 4 WHERE i = 5; +UPDATE ice_t SET ice_t.k = o.k, ice_t.j = o.j, FROM ice_t, other_table o where ice_t.id = o.id; + </codeblock> + </p> + <p> + The UPDATE FROM statement can be used to update a target Iceberg table based on a source table (or view) that doesn't need + to be an Iceberg table. If there are multiple matches on the JOIN condition, Impala will raise an error. + </p> + <p> + Limitations: + <ul> + <li>Only the merge-on-read update mode is supported.</li> + <li>Only writes position delete files, i.e. no support for writing equality deletes.</li> + <li>Cannot update tables with complex types.</li> + <li> + Can only write data and delete files in Parquet format. This means if table properties 'write.format.default' + and 'write.delete.format.default' are set, their values must be PARQUET. + </li> + <li> + Updating partitioning column with non-constant expression via the UPDATE FROM statement is not allowed. + The upcoming MERGE statement will not have this limitation. + </li> + </ul> + </p> + <p> + More information about the <codeph>UPDATE</codeph> statement can be found at <xref href="impala_update.xml#update"/>. </p> </conbody> </concept> diff --git a/docs/topics/impala_update.xml b/docs/topics/impala_update.xml index 0d3250b71..598a81f4e 100644 --- a/docs/topics/impala_update.xml +++ b/docs/topics/impala_update.xml @@ -27,6 +27,7 @@ under the License. <data name="Category" value="Impala"/> <data name="Category" value="SQL"/> <data name="Category" value="Kudu"/> + <data name="Category" value="Iceberg"/> <data name="Category" value="ETL"/> <data name="Category" value="Ingest"/> <data name="Category" value="DML"/> @@ -39,8 +40,8 @@ under the License. <p> <indexterm audience="hidden">UPDATE statement</indexterm> - Updates an arbitrary number of rows in a Kudu table. - This statement only works for Impala tables that use the Kudu storage engine. + This statement only works for Kudu and Iceberg tables. + Updates an arbitrary number of rows in a target table. </p> <p conref="../shared/impala_common.xml#common/syntax_blurb"/> @@ -68,6 +69,23 @@ UPDATE [<varname>database_name</varname>.]<varname>table_name</varname> SET <var If the <codeph>WHERE</codeph> clause is omitted, all rows in the table are updated. </p> + <p> + The number of affected rows is reported in an <cmdname>impala-shell</cmdname> message + and in the query profile. + </p> + + <p> + The optional <codeph>FROM</codeph> clause lets you restrict the + updates to only the rows in the specified table that are part + of the result set for a join query. The join clauses can include + tables with any format, but the table from which the rows are deleted + must be a Kudu or Iceberg table. + </p> + + + <p> + <b>Kudu considerations</b> + </p> <p> The conditions in the <codeph>WHERE</codeph> clause can refer to any combination of primary key columns or other columns. Referring to @@ -101,19 +119,6 @@ UPDATE [<varname>database_name</varname>.]<varname>table_name</varname> SET <var </li> </ul> - <p> - The number of affected rows is reported in an <cmdname>impala-shell</cmdname> message - and in the query profile. - </p> - - <p> - The optional <codeph>FROM</codeph> clause lets you restrict the - updates to only the rows in the specified table that are part - of the result set for a join query. The join clauses can include - non-Kudu tables, but the table from which the rows are deleted - must be a Kudu table. - </p> - <p conref="../shared/impala_common.xml#common/dml_blurb_kudu"/> <note conref="../shared/impala_common.xml#common/compute_stats_next"/> @@ -129,16 +134,16 @@ UPDATE [<varname>database_name</varname>.]<varname>table_name</varname> SET <var -- Set all rows to the same value for column c3. -- In this case, c1 and c2 are primary key columns -- and so cannot be updated. -UPDATE kudu_table SET c3 = 'not applicable'; +UPDATE target_table SET c3 = 'not applicable'; -- Update only the rows that match the condition. -UPDATE kudu_table SET c3 = NULL WHERE c1 > 100 AND c3 IS NULL; +UPDATE target_table SET c3 = NULL WHERE c1 > 100 AND c3 IS NULL; -- Does not update any rows, because the WHERE condition is always false. -UPDATE kudu_table SET c3 = 'impossible' WHERE 1 = 0; +UPDATE target_table SET c3 = 'impossible' WHERE 1 = 0; -- Change the values of multiple columns in a single UPDATE statement. -UPDATE kudu_table SET c3 = upper(c3), c4 = FALSE, c5 = 0 WHERE c6 = TRUE; +UPDATE target_table SET c3 = upper(c3), c4 = FALSE, c5 = 0 WHERE c6 = TRUE; </codeblock> <p> @@ -149,22 +154,22 @@ UPDATE kudu_table SET c3 = upper(c3), c4 = FALSE, c5 = 0 WHERE c6 = TRUE; <codeblock> -- Uppercase a column value, only for rows that have -- an ID that matches the value from another table. -UPDATE kudu_table SET c3 = upper(c3) - FROM kudu_table JOIN non_kudu_table - ON kudu_table.id = non_kudu_table.id; +UPDATE target_table SET c3 = upper(c3) + FROM target_table JOIN other_table + ON target_table.id = other_table.id; -- Same effect as previous statement. -- Assign table aliases in FROM clause, then refer to -- short names elsewhere in the statement. UPDATE t1 SET c3 = upper(c3) - FROM kudu_table t1 JOIN non_kudu_table t2 + FROM target_table t1 JOIN other_table t2 ON t1.id = t2.id; -- Same effect as previous statements, but more efficient. -- Use WHERE clause to skip updating values that are -- already uppercase. UPDATE t1 SET c3 = upper(c3) - FROM kudu_table t1 JOIN non_kudu_table t2 + FROM target_table t1 JOIN other_table t2 ON t1.id = t2.id WHERE c3 != upper(c3); </codeblock>