[hive] branch master updated: HIVE-26521: Iceberg: Raise exception when running delete/update statements on V1 tables (#3579) (Laszlo Pinter, reviewed by Adam Szita)

lpinter Fri, 16 Sep 2022 06:38:24 -0700

This is an automated email from the ASF dual-hosted git repository.

lpinter pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git



The following commit(s) were added to refs/heads/master by this push:
     new f232be7879 HIVE-26521: Iceberg: Raise exception when running 
delete/update statements on V1 tables (#3579) (Laszlo Pinter, reviewed by Adam 
Szita)
f232be7879 is described below

commit f232be787943ef903218532e319b9df7d495f4c7
Author: László Pintér <[email protected]>
AuthorDate: Fri Sep 16 15:37:59 2022 +0200

    HIVE-26521: Iceberg: Raise exception when running delete/update statements 
on V1 tables (#3579) (Laszlo Pinter, reviewed by Adam Szita)
---
 .../iceberg/mr/hive/HiveIcebergStorageHandler.java |  8 ++-
 .../apache/iceberg/mr/hive/TestHiveIcebergV2.java  | 35 ++++++++++++-
 .../results/negative/merge_split_update_off.q.out  |  2 +-
 .../results/negative/update_split_update_off.q.out |  2 +-
 .../llap/vectorized_iceberg_read_mixed.q.out       | 58 ++++++++++------------
 .../llap/vectorized_iceberg_read_orc.q.out         | 58 ++++++++++------------
 .../llap/vectorized_iceberg_read_parquet.q.out     | 58 ++++++++++------------
 .../positive/vectorized_iceberg_read_mixed.q.out   | 30 +++++------
 .../positive/vectorized_iceberg_read_orc.q.out     | 30 +++++------
 .../positive/vectorized_iceberg_read_parquet.q.out | 30 +++++------
 .../org/apache/hadoop/hive/ql/io/AcidUtils.java    |  4 +-
 .../hive/ql/metadata/HiveStorageHandler.java       | 11 ++--
 .../HiveRelOptMaterializationValidator.java        |  5 +-
 .../hadoop/hive/ql/parse/SemanticAnalyzer.java     |  2 +-
 14 files changed, 168 insertions(+), 165 deletions(-)

diff --git 
a/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergStorageHandler.java
 
b/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergStorageHandler.java
index e7dcbf67ef..a050b0456a 100644
--- 
a/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergStorageHandler.java
+++ 
b/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergStorageHandler.java
@@ -574,8 +574,12 @@ public class HiveIcebergStorageHandler implements 
HiveStoragePredicateHandler, H
   }
 
   @Override
-  public AcidSupportType supportsAcidOperations() {
-    return AcidSupportType.WITHOUT_TRANSACTIONS;
+  public AcidSupportType 
supportsAcidOperations(org.apache.hadoop.hive.ql.metadata.Table table) {
+    if (table.getParameters() != null && 
"2".equals(table.getParameters().get(TableProperties.FORMAT_VERSION))) {
+      return AcidSupportType.WITHOUT_TRANSACTIONS;
+    }
+
+    return AcidSupportType.NONE;
   }
 
   @Override
diff --git 
a/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/hive/TestHiveIcebergV2.java
 
b/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/hive/TestHiveIcebergV2.java
index 016fea5a09..ed06496741 100644
--- 
a/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/hive/TestHiveIcebergV2.java
+++ 
b/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/hive/TestHiveIcebergV2.java
@@ -22,6 +22,7 @@ package org.apache.iceberg.mr.hive;
 import java.io.IOException;
 import java.util.List;
 import java.util.stream.StreamSupport;
+import org.apache.iceberg.AssertHelpers;
 import org.apache.iceberg.DataFile;
 import org.apache.iceberg.DeleteFile;
 import org.apache.iceberg.FileFormat;
@@ -376,7 +377,8 @@ public class TestHiveIcebergV2 extends 
HiveIcebergStorageHandlerWithEngineBase {
 
       Schema schema = new Schema(required(1, columnName, type));
       List<Record> records = TestHelper.generateRandomRecords(schema, 1, 0L);
-      Table table = testTables.createTable(shell, tableName, schema, 
fileFormat, records, 2);
+      Table table = testTables.createTable(shell, tableName, schema, 
PartitionSpec.unpartitioned(), fileFormat, records,
+          2);
 
       shell.executeStatement("DELETE FROM " + tableName);
       HiveIcebergTestUtils.validateData(table, ImmutableList.of(), 0);
@@ -555,7 +557,8 @@ public class TestHiveIcebergV2 extends 
HiveIcebergStorageHandlerWithEngineBase {
 
       Schema schema = new Schema(required(1, columnName, type));
       List<Record> originalRecords = TestHelper.generateRandomRecords(schema, 
1, 0L);
-      Table table = testTables.createTable(shell, tableName, schema, 
fileFormat, originalRecords, 2);
+      Table table = testTables.createTable(shell, tableName, schema, 
PartitionSpec.unpartitioned(), fileFormat,
+          originalRecords, 2);
 
       List<Record> newRecords = TestHelper.generateRandomRecords(schema, 1, 
3L);
       shell.executeStatement(testTables.getUpdateQuery(tableName, 
newRecords.get(0)));
@@ -563,6 +566,34 @@ public class TestHiveIcebergV2 extends 
HiveIcebergStorageHandlerWithEngineBase {
     }
   }
 
+  @Test
+  public void testDeleteStatementFormatV1() {
+    // create and insert an initial batch of records
+    testTables.createTable(shell, "customers", 
HiveIcebergStorageHandlerTestUtils.CUSTOMER_SCHEMA,
+        PartitionSpec.unpartitioned(), fileFormat, 
HiveIcebergStorageHandlerTestUtils.OTHER_CUSTOMER_RECORDS_2);
+    // insert one more batch so that we have multiple data files within the 
same partition
+    
shell.executeStatement(testTables.getInsertQuery(HiveIcebergStorageHandlerTestUtils.OTHER_CUSTOMER_RECORDS_1,
+        TableIdentifier.of("default", "customers"), false));
+    AssertHelpers.assertThrows("should throw exception", 
IllegalArgumentException.class,
+        "Attempt to do update or delete on table", () -> {
+          shell.executeStatement("DELETE FROM customers WHERE customer_id=3 or 
first_name='Joanna'");
+        });
+  }
+
+  @Test
+  public void testUpdateStatementFormatV1() {
+    // create and insert an initial batch of records
+    testTables.createTable(shell, "customers", 
HiveIcebergStorageHandlerTestUtils.CUSTOMER_SCHEMA,
+        PartitionSpec.unpartitioned(), fileFormat, 
HiveIcebergStorageHandlerTestUtils.OTHER_CUSTOMER_RECORDS_2);
+    // insert one more batch so that we have multiple data files within the 
same partition
+    
shell.executeStatement(testTables.getInsertQuery(HiveIcebergStorageHandlerTestUtils.OTHER_CUSTOMER_RECORDS_1,
+        TableIdentifier.of("default", "customers"), false));
+    AssertHelpers.assertThrows("should throw exception", 
IllegalArgumentException.class,
+        "Attempt to do update or delete on table", () -> {
+          shell.executeStatement("UPDATE customers SET last_name='Changed' 
WHERE customer_id=3 or first_name='Joanna'");
+        });
+  }
+
   private static <T> PositionDelete<T> positionDelete(CharSequence path, long 
pos, T row) {
     PositionDelete<T> positionDelete = PositionDelete.create();
     return positionDelete.set(path, pos, row);
diff --git 
a/iceberg/iceberg-handler/src/test/results/negative/merge_split_update_off.q.out
 
b/iceberg/iceberg-handler/src/test/results/negative/merge_split_update_off.q.out
index b89353d661..61be29b091 100644
--- 
a/iceberg/iceberg-handler/src/test/results/negative/merge_split_update_off.q.out
+++ 
b/iceberg/iceberg-handler/src/test/results/negative/merge_split_update_off.q.out
@@ -22,4 +22,4 @@ POSTHOOK: query: create external table test_merge_source (a 
int, b string, c int
 POSTHOOK: type: CREATETABLE
 POSTHOOK: Output: database:default
 POSTHOOK: Output: default@test_merge_source
-FAILED: SemanticException [Error 10435]: Update and Merge into non-native ACID 
table is only supported when hive.split.update is true.
+FAILED: SemanticException [Error 10294]: Attempt to do update or delete using 
transaction manager that does not support these operations.
diff --git 
a/iceberg/iceberg-handler/src/test/results/negative/update_split_update_off.q.out
 
b/iceberg/iceberg-handler/src/test/results/negative/update_split_update_off.q.out
index 270d6ee09b..1c1aa42be6 100644
--- 
a/iceberg/iceberg-handler/src/test/results/negative/update_split_update_off.q.out
+++ 
b/iceberg/iceberg-handler/src/test/results/negative/update_split_update_off.q.out
@@ -10,4 +10,4 @@ POSTHOOK: query: create external table test_update (id int, 
value string) stored
 POSTHOOK: type: CREATETABLE
 POSTHOOK: Output: database:default
 POSTHOOK: Output: default@test_update
-FAILED: SemanticException [Error 10435]: Update and Merge into non-native ACID 
table is only supported when hive.split.update is true.
+FAILED: SemanticException [Error 10294]: Attempt to do update or delete using 
transaction manager that does not support these operations.
diff --git 
a/iceberg/iceberg-handler/src/test/results/positive/llap/vectorized_iceberg_read_mixed.q.out
 
b/iceberg/iceberg-handler/src/test/results/positive/llap/vectorized_iceberg_read_mixed.q.out
index e173ea1c84..b3d9d7f5e8 100644
--- 
a/iceberg/iceberg-handler/src/test/results/positive/llap/vectorized_iceberg_read_mixed.q.out
+++ 
b/iceberg/iceberg-handler/src/test/results/positive/llap/vectorized_iceberg_read_mixed.q.out
@@ -69,24 +69,20 @@ STAGE PLANS:
                 TableScan
                   alias: tbl_ice_mixed
                   Statistics: Num rows: 19 Data size: 1748 Basic stats: 
COMPLETE Column stats: COMPLETE
-                  Select Operator
-                    expressions: a (type: int), b (type: string)
-                    outputColumnNames: a, b
-                    Statistics: Num rows: 19 Data size: 1748 Basic stats: 
COMPLETE Column stats: COMPLETE
-                    Group By Operator
-                      aggregations: max(a)
-                      keys: b (type: string)
-                      minReductionHashAggr: 0.4736842
-                      mode: hash
-                      outputColumnNames: _col0, _col1
+                  Group By Operator
+                    aggregations: max(a)
+                    keys: b (type: string)
+                    minReductionHashAggr: 0.4736842
+                    mode: hash
+                    outputColumnNames: _col0, _col1
+                    Statistics: Num rows: 10 Data size: 920 Basic stats: 
COMPLETE Column stats: COMPLETE
+                    Reduce Output Operator
+                      key expressions: _col0 (type: string)
+                      null sort order: z
+                      sort order: +
+                      Map-reduce partition columns: _col0 (type: string)
                       Statistics: Num rows: 10 Data size: 920 Basic stats: 
COMPLETE Column stats: COMPLETE
-                      Reduce Output Operator
-                        key expressions: _col0 (type: string)
-                        null sort order: z
-                        sort order: +
-                        Map-reduce partition columns: _col0 (type: string)
-                        Statistics: Num rows: 10 Data size: 920 Basic stats: 
COMPLETE Column stats: COMPLETE
-                        value expressions: _col1 (type: int)
+                      value expressions: _col1 (type: int)
             Execution mode: vectorized, llap
             LLAP IO: all inputs (cache only)
         Reducer 2 
@@ -211,24 +207,20 @@ STAGE PLANS:
                 TableScan
                   alias: tbl_ice_mixed_all_types
                   Statistics: Num rows: 2 Data size: 746 Basic stats: COMPLETE 
Column stats: COMPLETE
-                  Select Operator
-                    expressions: t_float (type: float), t_double (type: 
double), t_boolean (type: boolean), t_int (type: int), t_bigint (type: bigint), 
t_binary (type: binary), t_string (type: string), t_timestamp (type: 
timestamp), t_date (type: date), t_decimal (type: decimal(4,2))
-                    outputColumnNames: t_float, t_double, t_boolean, t_int, 
t_bigint, t_binary, t_string, t_timestamp, t_date, t_decimal
+                  Group By Operator
+                    aggregations: max(t_float)
+                    keys: t_double (type: double), t_boolean (type: boolean), 
t_int (type: int), t_bigint (type: bigint), t_binary (type: binary), t_string 
(type: string), t_timestamp (type: timestamp), t_date (type: date), t_decimal 
(type: decimal(4,2))
+                    minReductionHashAggr: 0.99
+                    mode: hash
+                    outputColumnNames: _col0, _col1, _col2, _col3, _col4, 
_col5, _col6, _col7, _col8, _col9
                     Statistics: Num rows: 2 Data size: 746 Basic stats: 
COMPLETE Column stats: COMPLETE
-                    Group By Operator
-                      aggregations: max(t_float)
-                      keys: t_double (type: double), t_boolean (type: 
boolean), t_int (type: int), t_bigint (type: bigint), t_binary (type: binary), 
t_string (type: string), t_timestamp (type: timestamp), t_date (type: date), 
t_decimal (type: decimal(4,2))
-                      minReductionHashAggr: 0.99
-                      mode: hash
-                      outputColumnNames: _col0, _col1, _col2, _col3, _col4, 
_col5, _col6, _col7, _col8, _col9
+                    Reduce Output Operator
+                      key expressions: _col0 (type: double), _col1 (type: 
boolean), _col2 (type: int), _col3 (type: bigint), _col4 (type: binary), _col5 
(type: string), _col6 (type: timestamp), _col7 (type: date), _col8 (type: 
decimal(4,2))
+                      null sort order: zzzzzzzzz
+                      sort order: +++++++++
+                      Map-reduce partition columns: _col0 (type: double), 
_col1 (type: boolean), _col2 (type: int), _col3 (type: bigint), _col4 (type: 
binary), _col5 (type: string), _col6 (type: timestamp), _col7 (type: date), 
_col8 (type: decimal(4,2))
                       Statistics: Num rows: 2 Data size: 746 Basic stats: 
COMPLETE Column stats: COMPLETE
-                      Reduce Output Operator
-                        key expressions: _col0 (type: double), _col1 (type: 
boolean), _col2 (type: int), _col3 (type: bigint), _col4 (type: binary), _col5 
(type: string), _col6 (type: timestamp), _col7 (type: date), _col8 (type: 
decimal(4,2))
-                        null sort order: zzzzzzzzz
-                        sort order: +++++++++
-                        Map-reduce partition columns: _col0 (type: double), 
_col1 (type: boolean), _col2 (type: int), _col3 (type: bigint), _col4 (type: 
binary), _col5 (type: string), _col6 (type: timestamp), _col7 (type: date), 
_col8 (type: decimal(4,2))
-                        Statistics: Num rows: 2 Data size: 746 Basic stats: 
COMPLETE Column stats: COMPLETE
-                        value expressions: _col9 (type: float)
+                      value expressions: _col9 (type: float)
             Execution mode: vectorized, llap
             LLAP IO: all inputs (cache only)
         Reducer 2 
diff --git 
a/iceberg/iceberg-handler/src/test/results/positive/llap/vectorized_iceberg_read_orc.q.out
 
b/iceberg/iceberg-handler/src/test/results/positive/llap/vectorized_iceberg_read_orc.q.out
index 6b2a8780d6..dbfa9b6c90 100644
--- 
a/iceberg/iceberg-handler/src/test/results/positive/llap/vectorized_iceberg_read_orc.q.out
+++ 
b/iceberg/iceberg-handler/src/test/results/positive/llap/vectorized_iceberg_read_orc.q.out
@@ -53,24 +53,20 @@ STAGE PLANS:
                 TableScan
                   alias: tbl_ice_orc
                   Statistics: Num rows: 10 Data size: 920 Basic stats: 
COMPLETE Column stats: COMPLETE
-                  Select Operator
-                    expressions: a (type: int), b (type: string)
-                    outputColumnNames: a, b
-                    Statistics: Num rows: 10 Data size: 920 Basic stats: 
COMPLETE Column stats: COMPLETE
-                    Group By Operator
-                      aggregations: max(a)
-                      keys: b (type: string)
-                      minReductionHashAggr: 0.5
-                      mode: hash
-                      outputColumnNames: _col0, _col1
+                  Group By Operator
+                    aggregations: max(a)
+                    keys: b (type: string)
+                    minReductionHashAggr: 0.5
+                    mode: hash
+                    outputColumnNames: _col0, _col1
+                    Statistics: Num rows: 5 Data size: 460 Basic stats: 
COMPLETE Column stats: COMPLETE
+                    Reduce Output Operator
+                      key expressions: _col0 (type: string)
+                      null sort order: z
+                      sort order: +
+                      Map-reduce partition columns: _col0 (type: string)
                       Statistics: Num rows: 5 Data size: 460 Basic stats: 
COMPLETE Column stats: COMPLETE
-                      Reduce Output Operator
-                        key expressions: _col0 (type: string)
-                        null sort order: z
-                        sort order: +
-                        Map-reduce partition columns: _col0 (type: string)
-                        Statistics: Num rows: 5 Data size: 460 Basic stats: 
COMPLETE Column stats: COMPLETE
-                        value expressions: _col1 (type: int)
+                      value expressions: _col1 (type: int)
             Execution mode: vectorized, llap
             LLAP IO: all inputs (cache only)
         Reducer 2 
@@ -174,24 +170,20 @@ STAGE PLANS:
                 TableScan
                   alias: tbl_ice_orc_all_types
                   Statistics: Num rows: 1 Data size: 372 Basic stats: COMPLETE 
Column stats: COMPLETE
-                  Select Operator
-                    expressions: t_float (type: float), t_double (type: 
double), t_boolean (type: boolean), t_int (type: int), t_bigint (type: bigint), 
t_binary (type: binary), t_string (type: string), t_timestamp (type: 
timestamp), t_date (type: date), t_decimal (type: decimal(4,2))
-                    outputColumnNames: t_float, t_double, t_boolean, t_int, 
t_bigint, t_binary, t_string, t_timestamp, t_date, t_decimal
+                  Group By Operator
+                    aggregations: max(t_float)
+                    keys: t_double (type: double), t_boolean (type: boolean), 
t_int (type: int), t_bigint (type: bigint), t_binary (type: binary), t_string 
(type: string), t_timestamp (type: timestamp), t_date (type: date), t_decimal 
(type: decimal(4,2))
+                    minReductionHashAggr: 0.99
+                    mode: hash
+                    outputColumnNames: _col0, _col1, _col2, _col3, _col4, 
_col5, _col6, _col7, _col8, _col9
                     Statistics: Num rows: 1 Data size: 372 Basic stats: 
COMPLETE Column stats: COMPLETE
-                    Group By Operator
-                      aggregations: max(t_float)
-                      keys: t_double (type: double), t_boolean (type: 
boolean), t_int (type: int), t_bigint (type: bigint), t_binary (type: binary), 
t_string (type: string), t_timestamp (type: timestamp), t_date (type: date), 
t_decimal (type: decimal(4,2))
-                      minReductionHashAggr: 0.99
-                      mode: hash
-                      outputColumnNames: _col0, _col1, _col2, _col3, _col4, 
_col5, _col6, _col7, _col8, _col9
+                    Reduce Output Operator
+                      key expressions: _col0 (type: double), _col1 (type: 
boolean), _col2 (type: int), _col3 (type: bigint), _col4 (type: binary), _col5 
(type: string), _col6 (type: timestamp), _col7 (type: date), _col8 (type: 
decimal(4,2))
+                      null sort order: zzzzzzzzz
+                      sort order: +++++++++
+                      Map-reduce partition columns: _col0 (type: double), 
_col1 (type: boolean), _col2 (type: int), _col3 (type: bigint), _col4 (type: 
binary), _col5 (type: string), _col6 (type: timestamp), _col7 (type: date), 
_col8 (type: decimal(4,2))
                       Statistics: Num rows: 1 Data size: 372 Basic stats: 
COMPLETE Column stats: COMPLETE
-                      Reduce Output Operator
-                        key expressions: _col0 (type: double), _col1 (type: 
boolean), _col2 (type: int), _col3 (type: bigint), _col4 (type: binary), _col5 
(type: string), _col6 (type: timestamp), _col7 (type: date), _col8 (type: 
decimal(4,2))
-                        null sort order: zzzzzzzzz
-                        sort order: +++++++++
-                        Map-reduce partition columns: _col0 (type: double), 
_col1 (type: boolean), _col2 (type: int), _col3 (type: bigint), _col4 (type: 
binary), _col5 (type: string), _col6 (type: timestamp), _col7 (type: date), 
_col8 (type: decimal(4,2))
-                        Statistics: Num rows: 1 Data size: 372 Basic stats: 
COMPLETE Column stats: COMPLETE
-                        value expressions: _col9 (type: float)
+                      value expressions: _col9 (type: float)
             Execution mode: vectorized, llap
             LLAP IO: all inputs (cache only)
         Reducer 2 
diff --git 
a/iceberg/iceberg-handler/src/test/results/positive/llap/vectorized_iceberg_read_parquet.q.out
 
b/iceberg/iceberg-handler/src/test/results/positive/llap/vectorized_iceberg_read_parquet.q.out
index fce8eaa59f..ba31ed21cb 100644
--- 
a/iceberg/iceberg-handler/src/test/results/positive/llap/vectorized_iceberg_read_parquet.q.out
+++ 
b/iceberg/iceberg-handler/src/test/results/positive/llap/vectorized_iceberg_read_parquet.q.out
@@ -53,24 +53,20 @@ STAGE PLANS:
                 TableScan
                   alias: tbl_ice_parquet
                   Statistics: Num rows: 10 Data size: 920 Basic stats: 
COMPLETE Column stats: COMPLETE
-                  Select Operator
-                    expressions: a (type: int), b (type: string)
-                    outputColumnNames: a, b
-                    Statistics: Num rows: 10 Data size: 920 Basic stats: 
COMPLETE Column stats: COMPLETE
-                    Group By Operator
-                      aggregations: max(a)
-                      keys: b (type: string)
-                      minReductionHashAggr: 0.5
-                      mode: hash
-                      outputColumnNames: _col0, _col1
+                  Group By Operator
+                    aggregations: max(a)
+                    keys: b (type: string)
+                    minReductionHashAggr: 0.5
+                    mode: hash
+                    outputColumnNames: _col0, _col1
+                    Statistics: Num rows: 5 Data size: 460 Basic stats: 
COMPLETE Column stats: COMPLETE
+                    Reduce Output Operator
+                      key expressions: _col0 (type: string)
+                      null sort order: z
+                      sort order: +
+                      Map-reduce partition columns: _col0 (type: string)
                       Statistics: Num rows: 5 Data size: 460 Basic stats: 
COMPLETE Column stats: COMPLETE
-                      Reduce Output Operator
-                        key expressions: _col0 (type: string)
-                        null sort order: z
-                        sort order: +
-                        Map-reduce partition columns: _col0 (type: string)
-                        Statistics: Num rows: 5 Data size: 460 Basic stats: 
COMPLETE Column stats: COMPLETE
-                        value expressions: _col1 (type: int)
+                      value expressions: _col1 (type: int)
             Execution mode: vectorized, llap
             LLAP IO: all inputs (cache only)
         Reducer 2 
@@ -174,24 +170,20 @@ STAGE PLANS:
                 TableScan
                   alias: tbl_ice_parquet_all_types
                   Statistics: Num rows: 1 Data size: 372 Basic stats: COMPLETE 
Column stats: COMPLETE
-                  Select Operator
-                    expressions: t_float (type: float), t_double (type: 
double), t_boolean (type: boolean), t_int (type: int), t_bigint (type: bigint), 
t_binary (type: binary), t_string (type: string), t_timestamp (type: 
timestamp), t_date (type: date), t_decimal (type: decimal(4,2))
-                    outputColumnNames: t_float, t_double, t_boolean, t_int, 
t_bigint, t_binary, t_string, t_timestamp, t_date, t_decimal
+                  Group By Operator
+                    aggregations: max(t_float)
+                    keys: t_double (type: double), t_boolean (type: boolean), 
t_int (type: int), t_bigint (type: bigint), t_binary (type: binary), t_string 
(type: string), t_timestamp (type: timestamp), t_date (type: date), t_decimal 
(type: decimal(4,2))
+                    minReductionHashAggr: 0.99
+                    mode: hash
+                    outputColumnNames: _col0, _col1, _col2, _col3, _col4, 
_col5, _col6, _col7, _col8, _col9
                     Statistics: Num rows: 1 Data size: 372 Basic stats: 
COMPLETE Column stats: COMPLETE
-                    Group By Operator
-                      aggregations: max(t_float)
-                      keys: t_double (type: double), t_boolean (type: 
boolean), t_int (type: int), t_bigint (type: bigint), t_binary (type: binary), 
t_string (type: string), t_timestamp (type: timestamp), t_date (type: date), 
t_decimal (type: decimal(4,2))
-                      minReductionHashAggr: 0.99
-                      mode: hash
-                      outputColumnNames: _col0, _col1, _col2, _col3, _col4, 
_col5, _col6, _col7, _col8, _col9
+                    Reduce Output Operator
+                      key expressions: _col0 (type: double), _col1 (type: 
boolean), _col2 (type: int), _col3 (type: bigint), _col4 (type: binary), _col5 
(type: string), _col6 (type: timestamp), _col7 (type: date), _col8 (type: 
decimal(4,2))
+                      null sort order: zzzzzzzzz
+                      sort order: +++++++++
+                      Map-reduce partition columns: _col0 (type: double), 
_col1 (type: boolean), _col2 (type: int), _col3 (type: bigint), _col4 (type: 
binary), _col5 (type: string), _col6 (type: timestamp), _col7 (type: date), 
_col8 (type: decimal(4,2))
                       Statistics: Num rows: 1 Data size: 372 Basic stats: 
COMPLETE Column stats: COMPLETE
-                      Reduce Output Operator
-                        key expressions: _col0 (type: double), _col1 (type: 
boolean), _col2 (type: int), _col3 (type: bigint), _col4 (type: binary), _col5 
(type: string), _col6 (type: timestamp), _col7 (type: date), _col8 (type: 
decimal(4,2))
-                        null sort order: zzzzzzzzz
-                        sort order: +++++++++
-                        Map-reduce partition columns: _col0 (type: double), 
_col1 (type: boolean), _col2 (type: int), _col3 (type: bigint), _col4 (type: 
binary), _col5 (type: string), _col6 (type: timestamp), _col7 (type: date), 
_col8 (type: decimal(4,2))
-                        Statistics: Num rows: 1 Data size: 372 Basic stats: 
COMPLETE Column stats: COMPLETE
-                        value expressions: _col9 (type: float)
+                      value expressions: _col9 (type: float)
             Execution mode: vectorized, llap
             LLAP IO: all inputs (cache only)
         Reducer 2 
diff --git 
a/iceberg/iceberg-handler/src/test/results/positive/vectorized_iceberg_read_mixed.q.out
 
b/iceberg/iceberg-handler/src/test/results/positive/vectorized_iceberg_read_mixed.q.out
index 2701494205..e6d990caa4 100644
--- 
a/iceberg/iceberg-handler/src/test/results/positive/vectorized_iceberg_read_mixed.q.out
+++ 
b/iceberg/iceberg-handler/src/test/results/positive/vectorized_iceberg_read_mixed.q.out
@@ -62,18 +62,16 @@ Stage-0
     limit:-1
     Stage-1
       Reducer 2 vectorized
-      File Output Operator [FS_11]
-        Group By Operator [GBY_10] (rows=10 width=92)
+      File Output Operator [FS_10]
+        Group By Operator [GBY_9] (rows=10 width=92)
           
Output:["_col0","_col1"],aggregations:["max(VALUE._col0)"],keys:KEY._col0
         <-Map 1 [SIMPLE_EDGE] vectorized
-          SHUFFLE [RS_9]
+          SHUFFLE [RS_8]
             PartitionCols:_col0
-            Group By Operator [GBY_8] (rows=10 width=92)
+            Group By Operator [GBY_7] (rows=10 width=92)
               Output:["_col0","_col1"],aggregations:["max(a)"],keys:b
-              Select Operator [SEL_7] (rows=19 width=92)
-                Output:["a","b"]
-                TableScan [TS_0] (rows=19 width=92)
-                  
default@tbl_ice_mixed,tbl_ice_mixed,Tbl:COMPLETE,Col:COMPLETE,Output:["a","b"]
+              TableScan [TS_0] (rows=19 width=92)
+                
default@tbl_ice_mixed,tbl_ice_mixed,Tbl:COMPLETE,Col:COMPLETE,Output:["a","b"]
 
 PREHOOK: query: select b, max(a) from tbl_ice_mixed group by b
 PREHOOK: type: QUERY
@@ -167,20 +165,18 @@ Stage-0
     limit:-1
     Stage-1
       Reducer 2 vectorized
-      File Output Operator [FS_12]
-        Select Operator [SEL_11] (rows=2 width=373)
+      File Output Operator [FS_11]
+        Select Operator [SEL_10] (rows=2 width=373)
           
Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9"]
-          Group By Operator [GBY_10] (rows=2 width=373)
+          Group By Operator [GBY_9] (rows=2 width=373)
             
Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9"],aggregations:["max(VALUE._col0)"],keys:KEY._col0,
 KEY._col1, KEY._col2, KEY._col3, KEY._col4, KEY._col5, KEY._col6, KEY._col7, 
KEY._col8
           <-Map 1 [SIMPLE_EDGE] vectorized
-            SHUFFLE [RS_9]
+            SHUFFLE [RS_8]
               PartitionCols:_col0, _col1, _col2, _col3, _col4, _col5, _col6, 
_col7, _col8
-              Group By Operator [GBY_8] (rows=2 width=373)
+              Group By Operator [GBY_7] (rows=2 width=373)
                 
Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9"],aggregations:["max(t_float)"],keys:t_double,
 t_boolean, t_int, t_bigint, t_binary, t_string, t_timestamp, t_date, t_decimal
-                Select Operator [SEL_7] (rows=2 width=373)
-                  
Output:["t_float","t_double","t_boolean","t_int","t_bigint","t_binary","t_string","t_timestamp","t_date","t_decimal"]
-                  TableScan [TS_0] (rows=2 width=373)
-                    
default@tbl_ice_mixed_all_types,tbl_ice_mixed_all_types,Tbl:COMPLETE,Col:COMPLETE,Output:["t_float","t_double","t_boolean","t_int","t_bigint","t_binary","t_string","t_timestamp","t_date","t_decimal"]
+                TableScan [TS_0] (rows=2 width=373)
+                  
default@tbl_ice_mixed_all_types,tbl_ice_mixed_all_types,Tbl:COMPLETE,Col:COMPLETE,Output:["t_float","t_double","t_boolean","t_int","t_bigint","t_binary","t_string","t_timestamp","t_date","t_decimal"]
 
 PREHOOK: query: select max(t_float), t_double, t_boolean, t_int, t_bigint, 
t_binary, t_string, t_timestamp, t_date, t_decimal from tbl_ice_mixed_all_types
         group by t_double, t_boolean, t_int, t_bigint, t_binary, t_string, 
t_timestamp, t_date, t_decimal
diff --git 
a/iceberg/iceberg-handler/src/test/results/positive/vectorized_iceberg_read_orc.q.out
 
b/iceberg/iceberg-handler/src/test/results/positive/vectorized_iceberg_read_orc.q.out
index e6b1ceb69b..91c7bb2c54 100644
--- 
a/iceberg/iceberg-handler/src/test/results/positive/vectorized_iceberg_read_orc.q.out
+++ 
b/iceberg/iceberg-handler/src/test/results/positive/vectorized_iceberg_read_orc.q.out
@@ -46,18 +46,16 @@ Stage-0
     limit:-1
     Stage-1
       Reducer 2 vectorized
-      File Output Operator [FS_11]
-        Group By Operator [GBY_10] (rows=5 width=92)
+      File Output Operator [FS_10]
+        Group By Operator [GBY_9] (rows=5 width=92)
           
Output:["_col0","_col1"],aggregations:["max(VALUE._col0)"],keys:KEY._col0
         <-Map 1 [SIMPLE_EDGE] vectorized
-          SHUFFLE [RS_9]
+          SHUFFLE [RS_8]
             PartitionCols:_col0
-            Group By Operator [GBY_8] (rows=5 width=92)
+            Group By Operator [GBY_7] (rows=5 width=92)
               Output:["_col0","_col1"],aggregations:["max(a)"],keys:b
-              Select Operator [SEL_7] (rows=10 width=92)
-                Output:["a","b"]
-                TableScan [TS_0] (rows=10 width=92)
-                  
default@tbl_ice_orc,tbl_ice_orc,Tbl:COMPLETE,Col:COMPLETE,Output:["a","b"]
+              TableScan [TS_0] (rows=10 width=92)
+                
default@tbl_ice_orc,tbl_ice_orc,Tbl:COMPLETE,Col:COMPLETE,Output:["a","b"]
 
 PREHOOK: query: select b, max(a) from tbl_ice_orc group by b
 PREHOOK: type: QUERY
@@ -130,20 +128,18 @@ Stage-0
     limit:-1
     Stage-1
       Reducer 2 vectorized
-      File Output Operator [FS_12]
-        Select Operator [SEL_11] (rows=1 width=372)
+      File Output Operator [FS_11]
+        Select Operator [SEL_10] (rows=1 width=372)
           
Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9"]
-          Group By Operator [GBY_10] (rows=1 width=372)
+          Group By Operator [GBY_9] (rows=1 width=372)
             
Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9"],aggregations:["max(VALUE._col0)"],keys:KEY._col0,
 KEY._col1, KEY._col2, KEY._col3, KEY._col4, KEY._col5, KEY._col6, KEY._col7, 
KEY._col8
           <-Map 1 [SIMPLE_EDGE] vectorized
-            SHUFFLE [RS_9]
+            SHUFFLE [RS_8]
               PartitionCols:_col0, _col1, _col2, _col3, _col4, _col5, _col6, 
_col7, _col8
-              Group By Operator [GBY_8] (rows=1 width=372)
+              Group By Operator [GBY_7] (rows=1 width=372)
                 
Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9"],aggregations:["max(t_float)"],keys:t_double,
 t_boolean, t_int, t_bigint, t_binary, t_string, t_timestamp, t_date, t_decimal
-                Select Operator [SEL_7] (rows=1 width=372)
-                  
Output:["t_float","t_double","t_boolean","t_int","t_bigint","t_binary","t_string","t_timestamp","t_date","t_decimal"]
-                  TableScan [TS_0] (rows=1 width=372)
-                    
default@tbl_ice_orc_all_types,tbl_ice_orc_all_types,Tbl:COMPLETE,Col:COMPLETE,Output:["t_float","t_double","t_boolean","t_int","t_bigint","t_binary","t_string","t_timestamp","t_date","t_decimal"]
+                TableScan [TS_0] (rows=1 width=372)
+                  
default@tbl_ice_orc_all_types,tbl_ice_orc_all_types,Tbl:COMPLETE,Col:COMPLETE,Output:["t_float","t_double","t_boolean","t_int","t_bigint","t_binary","t_string","t_timestamp","t_date","t_decimal"]
 
 PREHOOK: query: select max(t_float), t_double, t_boolean, t_int, t_bigint, 
t_binary, t_string, t_timestamp, t_date, t_decimal from tbl_ice_orc_all_types
         group by t_double, t_boolean, t_int, t_bigint, t_binary, t_string, 
t_timestamp, t_date, t_decimal
diff --git 
a/iceberg/iceberg-handler/src/test/results/positive/vectorized_iceberg_read_parquet.q.out
 
b/iceberg/iceberg-handler/src/test/results/positive/vectorized_iceberg_read_parquet.q.out
index 34faa886d7..43da19476b 100644
--- 
a/iceberg/iceberg-handler/src/test/results/positive/vectorized_iceberg_read_parquet.q.out
+++ 
b/iceberg/iceberg-handler/src/test/results/positive/vectorized_iceberg_read_parquet.q.out
@@ -46,18 +46,16 @@ Stage-0
     limit:-1
     Stage-1
       Reducer 2 vectorized
-      File Output Operator [FS_11]
-        Group By Operator [GBY_10] (rows=5 width=92)
+      File Output Operator [FS_10]
+        Group By Operator [GBY_9] (rows=5 width=92)
           
Output:["_col0","_col1"],aggregations:["max(VALUE._col0)"],keys:KEY._col0
         <-Map 1 [SIMPLE_EDGE] vectorized
-          SHUFFLE [RS_9]
+          SHUFFLE [RS_8]
             PartitionCols:_col0
-            Group By Operator [GBY_8] (rows=5 width=92)
+            Group By Operator [GBY_7] (rows=5 width=92)
               Output:["_col0","_col1"],aggregations:["max(a)"],keys:b
-              Select Operator [SEL_7] (rows=10 width=92)
-                Output:["a","b"]
-                TableScan [TS_0] (rows=10 width=92)
-                  
default@tbl_ice_parquet,tbl_ice_parquet,Tbl:COMPLETE,Col:COMPLETE,Output:["a","b"]
+              TableScan [TS_0] (rows=10 width=92)
+                
default@tbl_ice_parquet,tbl_ice_parquet,Tbl:COMPLETE,Col:COMPLETE,Output:["a","b"]
 
 PREHOOK: query: select b, max(a) from tbl_ice_parquet group by b
 PREHOOK: type: QUERY
@@ -130,20 +128,18 @@ Stage-0
     limit:-1
     Stage-1
       Reducer 2 vectorized
-      File Output Operator [FS_12]
-        Select Operator [SEL_11] (rows=1 width=372)
+      File Output Operator [FS_11]
+        Select Operator [SEL_10] (rows=1 width=372)
           
Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9"]
-          Group By Operator [GBY_10] (rows=1 width=372)
+          Group By Operator [GBY_9] (rows=1 width=372)
             
Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9"],aggregations:["max(VALUE._col0)"],keys:KEY._col0,
 KEY._col1, KEY._col2, KEY._col3, KEY._col4, KEY._col5, KEY._col6, KEY._col7, 
KEY._col8
           <-Map 1 [SIMPLE_EDGE] vectorized
-            SHUFFLE [RS_9]
+            SHUFFLE [RS_8]
               PartitionCols:_col0, _col1, _col2, _col3, _col4, _col5, _col6, 
_col7, _col8
-              Group By Operator [GBY_8] (rows=1 width=372)
+              Group By Operator [GBY_7] (rows=1 width=372)
                 
Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9"],aggregations:["max(t_float)"],keys:t_double,
 t_boolean, t_int, t_bigint, t_binary, t_string, t_timestamp, t_date, t_decimal
-                Select Operator [SEL_7] (rows=1 width=372)
-                  
Output:["t_float","t_double","t_boolean","t_int","t_bigint","t_binary","t_string","t_timestamp","t_date","t_decimal"]
-                  TableScan [TS_0] (rows=1 width=372)
-                    
default@tbl_ice_parquet_all_types,tbl_ice_parquet_all_types,Tbl:COMPLETE,Col:COMPLETE,Output:["t_float","t_double","t_boolean","t_int","t_bigint","t_binary","t_string","t_timestamp","t_date","t_decimal"]
+                TableScan [TS_0] (rows=1 width=372)
+                  
default@tbl_ice_parquet_all_types,tbl_ice_parquet_all_types,Tbl:COMPLETE,Col:COMPLETE,Output:["t_float","t_double","t_boolean","t_int","t_bigint","t_binary","t_string","t_timestamp","t_date","t_decimal"]
 
 PREHOOK: query: select max(t_float), t_double, t_boolean, t_int, t_bigint, 
t_binary, t_string, t_timestamp, t_date, t_decimal from 
tbl_ice_parquet_all_types
         group by t_double, t_boolean, t_int, t_bigint, t_binary, t_string, 
t_timestamp, t_date, t_decimal
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/AcidUtils.java 
b/ql/src/java/org/apache/hadoop/hive/ql/io/AcidUtils.java
index 4582878817..ca2516123c 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/AcidUtils.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/io/AcidUtils.java
@@ -3364,7 +3364,7 @@ public class AcidUtils {
 
   public static boolean isNonNativeAcidTable(Table table) {
     return table != null && table.getStorageHandler() != null &&
-        table.getStorageHandler().supportsAcidOperations() != 
HiveStorageHandler.AcidSupportType.NONE;
+        table.getStorageHandler().supportsAcidOperations(table) != 
HiveStorageHandler.AcidSupportType.NONE;
   }
 
   /**
@@ -3386,7 +3386,7 @@ public class AcidUtils {
 
   public static boolean acidTableWithoutTransactions(Table table) {
     return table != null && table.getStorageHandler() != null &&
-        table.getStorageHandler().supportsAcidOperations() == 
HiveStorageHandler.AcidSupportType.WITHOUT_TRANSACTIONS;
+        table.getStorageHandler().supportsAcidOperations(table) == 
HiveStorageHandler.AcidSupportType.WITHOUT_TRANSACTIONS;
   }
 
   static class DirInfoValue {
diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/metadata/HiveStorageHandler.java 
b/ql/src/java/org/apache/hadoop/hive/ql/metadata/HiveStorageHandler.java
index 65e458cc65..bdfdf3fde3 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/metadata/HiveStorageHandler.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/metadata/HiveStorageHandler.java
@@ -291,7 +291,7 @@ public interface HiveStorageHandler extends Configurable {
    *
    * @return the table's ACID support type
    */
-  default AcidSupportType supportsAcidOperations() {
+  default AcidSupportType 
supportsAcidOperations(org.apache.hadoop.hive.ql.metadata.Table table) {
     return AcidSupportType.NONE;
   }
 
@@ -299,7 +299,8 @@ public interface HiveStorageHandler extends Configurable {
    * Specifies which additional virtual columns should be added to the virtual 
column registry during compilation
    * for tables that support ACID operations.
    *
-   * Should only return a non-empty list if {@link 
HiveStorageHandler#supportsAcidOperations()} ()} returns something
+   * Should only return a non-empty list if
+   * {@link 
HiveStorageHandler#supportsAcidOperations(org.apache.hadoop.hive.ql.metadata.Table)}
 ()} returns something
    * other NONE.
    *
    * @return the list of ACID virtual columns
@@ -318,7 +319,8 @@ public interface HiveStorageHandler extends Configurable {
    *
    * This method specifies which columns should be injected into the 
&lt;selectCols&gt; part of the rewritten query.
    *
-   * Should only return a non-empty list if {@link 
HiveStorageHandler#supportsAcidOperations()} returns something
+   * Should only return a non-empty list if
+   * {@link 
HiveStorageHandler#supportsAcidOperations(org.apache.hadoop.hive.ql.metadata.Table)}
 returns something
    * other NONE.
    *
    * @param table the table which is being deleted/updated/merged into
@@ -336,7 +338,8 @@ public interface HiveStorageHandler extends Configurable {
    *
    * This method specifies which columns should be injected into the 
&lt;sortCols&gt; part of the rewritten query.
    *
-   * Should only return a non-empty list if {@link 
HiveStorageHandler#supportsAcidOperations()} returns something
+   * Should only return a non-empty list if
+   * {@link 
HiveStorageHandler#supportsAcidOperations(org.apache.hadoop.hive.ql.metadata.Table)}
 returns something
    * other NONE.
    *
    * @param table the table which is being deleted/updated/merged into
diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveRelOptMaterializationValidator.java
 
b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveRelOptMaterializationValidator.java
index 3897a2912a..d8f0a3041c 100644
--- 
a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveRelOptMaterializationValidator.java
+++ 
b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveRelOptMaterializationValidator.java
@@ -89,8 +89,9 @@ public class HiveRelOptMaterializationValidator extends 
HiveRelShuttleImpl {
     if (tab.isTemporary()) {
       fail(tab.getTableName() + " is a temporary table");
     }
-    if (tab.getTableType() == TableType.EXTERNAL_TABLE && 
!AcidUtils.isNonNativeAcidTable(tab)) {
-      fail(tab.getFullyQualifiedName() + " is an external table");
+    if (tab.getTableType() == TableType.EXTERNAL_TABLE &&
+        !(tab.getStorageHandler() != null && 
tab.getStorageHandler().areSnapshotsSupported())) {
+      fail(tab.getFullyQualifiedName() + " is an external table and does not 
support snapshots");
     }
     return hiveScan;
   }
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java 
b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
index 3e12bf4efd..2458cbb096 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
@@ -14222,7 +14222,7 @@ public class SemanticAnalyzer extends 
BaseSemanticAnalyzer {
           }
           if (AcidUtils.isTransactionalTable(table)) {
             ++nativeAcidCount;
-          } else if (AcidUtils.isNonNativeAcidTable(table) && 
table.getStorageHandler().areSnapshotsSupported()) {
+          } else if (table.isNonNative() && 
table.getStorageHandler().areSnapshotsSupported()) {
             ++supportsSnapshotCount;
           } else {
             throw new SemanticException("Automatic rewriting for materialized 
view cannot "

[hive] branch master updated: HIVE-26521: Iceberg: Raise exception when running delete/update statements on V1 tables (#3579) (Laszlo Pinter, reviewed by Adam Szita)

Reply via email to