This is an automated email from the ASF dual-hosted git repository.
lpinter pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git
The following commit(s) were added to refs/heads/master by this push:
new f232be7879 HIVE-26521: Iceberg: Raise exception when running
delete/update statements on V1 tables (#3579) (Laszlo Pinter, reviewed by Adam
Szita)
f232be7879 is described below
commit f232be787943ef903218532e319b9df7d495f4c7
Author: László Pintér <[email protected]>
AuthorDate: Fri Sep 16 15:37:59 2022 +0200
HIVE-26521: Iceberg: Raise exception when running delete/update statements
on V1 tables (#3579) (Laszlo Pinter, reviewed by Adam Szita)
---
.../iceberg/mr/hive/HiveIcebergStorageHandler.java | 8 ++-
.../apache/iceberg/mr/hive/TestHiveIcebergV2.java | 35 ++++++++++++-
.../results/negative/merge_split_update_off.q.out | 2 +-
.../results/negative/update_split_update_off.q.out | 2 +-
.../llap/vectorized_iceberg_read_mixed.q.out | 58 ++++++++++------------
.../llap/vectorized_iceberg_read_orc.q.out | 58 ++++++++++------------
.../llap/vectorized_iceberg_read_parquet.q.out | 58 ++++++++++------------
.../positive/vectorized_iceberg_read_mixed.q.out | 30 +++++------
.../positive/vectorized_iceberg_read_orc.q.out | 30 +++++------
.../positive/vectorized_iceberg_read_parquet.q.out | 30 +++++------
.../org/apache/hadoop/hive/ql/io/AcidUtils.java | 4 +-
.../hive/ql/metadata/HiveStorageHandler.java | 11 ++--
.../HiveRelOptMaterializationValidator.java | 5 +-
.../hadoop/hive/ql/parse/SemanticAnalyzer.java | 2 +-
14 files changed, 168 insertions(+), 165 deletions(-)
diff --git
a/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergStorageHandler.java
b/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergStorageHandler.java
index e7dcbf67ef..a050b0456a 100644
---
a/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergStorageHandler.java
+++
b/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergStorageHandler.java
@@ -574,8 +574,12 @@ public class HiveIcebergStorageHandler implements
HiveStoragePredicateHandler, H
}
@Override
- public AcidSupportType supportsAcidOperations() {
- return AcidSupportType.WITHOUT_TRANSACTIONS;
+ public AcidSupportType
supportsAcidOperations(org.apache.hadoop.hive.ql.metadata.Table table) {
+ if (table.getParameters() != null &&
"2".equals(table.getParameters().get(TableProperties.FORMAT_VERSION))) {
+ return AcidSupportType.WITHOUT_TRANSACTIONS;
+ }
+
+ return AcidSupportType.NONE;
}
@Override
diff --git
a/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/hive/TestHiveIcebergV2.java
b/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/hive/TestHiveIcebergV2.java
index 016fea5a09..ed06496741 100644
---
a/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/hive/TestHiveIcebergV2.java
+++
b/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/hive/TestHiveIcebergV2.java
@@ -22,6 +22,7 @@ package org.apache.iceberg.mr.hive;
import java.io.IOException;
import java.util.List;
import java.util.stream.StreamSupport;
+import org.apache.iceberg.AssertHelpers;
import org.apache.iceberg.DataFile;
import org.apache.iceberg.DeleteFile;
import org.apache.iceberg.FileFormat;
@@ -376,7 +377,8 @@ public class TestHiveIcebergV2 extends
HiveIcebergStorageHandlerWithEngineBase {
Schema schema = new Schema(required(1, columnName, type));
List<Record> records = TestHelper.generateRandomRecords(schema, 1, 0L);
- Table table = testTables.createTable(shell, tableName, schema,
fileFormat, records, 2);
+ Table table = testTables.createTable(shell, tableName, schema,
PartitionSpec.unpartitioned(), fileFormat, records,
+ 2);
shell.executeStatement("DELETE FROM " + tableName);
HiveIcebergTestUtils.validateData(table, ImmutableList.of(), 0);
@@ -555,7 +557,8 @@ public class TestHiveIcebergV2 extends
HiveIcebergStorageHandlerWithEngineBase {
Schema schema = new Schema(required(1, columnName, type));
List<Record> originalRecords = TestHelper.generateRandomRecords(schema,
1, 0L);
- Table table = testTables.createTable(shell, tableName, schema,
fileFormat, originalRecords, 2);
+ Table table = testTables.createTable(shell, tableName, schema,
PartitionSpec.unpartitioned(), fileFormat,
+ originalRecords, 2);
List<Record> newRecords = TestHelper.generateRandomRecords(schema, 1,
3L);
shell.executeStatement(testTables.getUpdateQuery(tableName,
newRecords.get(0)));
@@ -563,6 +566,34 @@ public class TestHiveIcebergV2 extends
HiveIcebergStorageHandlerWithEngineBase {
}
}
+ @Test
+ public void testDeleteStatementFormatV1() {
+ // create and insert an initial batch of records
+ testTables.createTable(shell, "customers",
HiveIcebergStorageHandlerTestUtils.CUSTOMER_SCHEMA,
+ PartitionSpec.unpartitioned(), fileFormat,
HiveIcebergStorageHandlerTestUtils.OTHER_CUSTOMER_RECORDS_2);
+ // insert one more batch so that we have multiple data files within the
same partition
+
shell.executeStatement(testTables.getInsertQuery(HiveIcebergStorageHandlerTestUtils.OTHER_CUSTOMER_RECORDS_1,
+ TableIdentifier.of("default", "customers"), false));
+ AssertHelpers.assertThrows("should throw exception",
IllegalArgumentException.class,
+ "Attempt to do update or delete on table", () -> {
+ shell.executeStatement("DELETE FROM customers WHERE customer_id=3 or
first_name='Joanna'");
+ });
+ }
+
+ @Test
+ public void testUpdateStatementFormatV1() {
+ // create and insert an initial batch of records
+ testTables.createTable(shell, "customers",
HiveIcebergStorageHandlerTestUtils.CUSTOMER_SCHEMA,
+ PartitionSpec.unpartitioned(), fileFormat,
HiveIcebergStorageHandlerTestUtils.OTHER_CUSTOMER_RECORDS_2);
+ // insert one more batch so that we have multiple data files within the
same partition
+
shell.executeStatement(testTables.getInsertQuery(HiveIcebergStorageHandlerTestUtils.OTHER_CUSTOMER_RECORDS_1,
+ TableIdentifier.of("default", "customers"), false));
+ AssertHelpers.assertThrows("should throw exception",
IllegalArgumentException.class,
+ "Attempt to do update or delete on table", () -> {
+ shell.executeStatement("UPDATE customers SET last_name='Changed'
WHERE customer_id=3 or first_name='Joanna'");
+ });
+ }
+
private static <T> PositionDelete<T> positionDelete(CharSequence path, long
pos, T row) {
PositionDelete<T> positionDelete = PositionDelete.create();
return positionDelete.set(path, pos, row);
diff --git
a/iceberg/iceberg-handler/src/test/results/negative/merge_split_update_off.q.out
b/iceberg/iceberg-handler/src/test/results/negative/merge_split_update_off.q.out
index b89353d661..61be29b091 100644
---
a/iceberg/iceberg-handler/src/test/results/negative/merge_split_update_off.q.out
+++
b/iceberg/iceberg-handler/src/test/results/negative/merge_split_update_off.q.out
@@ -22,4 +22,4 @@ POSTHOOK: query: create external table test_merge_source (a
int, b string, c int
POSTHOOK: type: CREATETABLE
POSTHOOK: Output: database:default
POSTHOOK: Output: default@test_merge_source
-FAILED: SemanticException [Error 10435]: Update and Merge into non-native ACID
table is only supported when hive.split.update is true.
+FAILED: SemanticException [Error 10294]: Attempt to do update or delete using
transaction manager that does not support these operations.
diff --git
a/iceberg/iceberg-handler/src/test/results/negative/update_split_update_off.q.out
b/iceberg/iceberg-handler/src/test/results/negative/update_split_update_off.q.out
index 270d6ee09b..1c1aa42be6 100644
---
a/iceberg/iceberg-handler/src/test/results/negative/update_split_update_off.q.out
+++
b/iceberg/iceberg-handler/src/test/results/negative/update_split_update_off.q.out
@@ -10,4 +10,4 @@ POSTHOOK: query: create external table test_update (id int,
value string) stored
POSTHOOK: type: CREATETABLE
POSTHOOK: Output: database:default
POSTHOOK: Output: default@test_update
-FAILED: SemanticException [Error 10435]: Update and Merge into non-native ACID
table is only supported when hive.split.update is true.
+FAILED: SemanticException [Error 10294]: Attempt to do update or delete using
transaction manager that does not support these operations.
diff --git
a/iceberg/iceberg-handler/src/test/results/positive/llap/vectorized_iceberg_read_mixed.q.out
b/iceberg/iceberg-handler/src/test/results/positive/llap/vectorized_iceberg_read_mixed.q.out
index e173ea1c84..b3d9d7f5e8 100644
---
a/iceberg/iceberg-handler/src/test/results/positive/llap/vectorized_iceberg_read_mixed.q.out
+++
b/iceberg/iceberg-handler/src/test/results/positive/llap/vectorized_iceberg_read_mixed.q.out
@@ -69,24 +69,20 @@ STAGE PLANS:
TableScan
alias: tbl_ice_mixed
Statistics: Num rows: 19 Data size: 1748 Basic stats:
COMPLETE Column stats: COMPLETE
- Select Operator
- expressions: a (type: int), b (type: string)
- outputColumnNames: a, b
- Statistics: Num rows: 19 Data size: 1748 Basic stats:
COMPLETE Column stats: COMPLETE
- Group By Operator
- aggregations: max(a)
- keys: b (type: string)
- minReductionHashAggr: 0.4736842
- mode: hash
- outputColumnNames: _col0, _col1
+ Group By Operator
+ aggregations: max(a)
+ keys: b (type: string)
+ minReductionHashAggr: 0.4736842
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 10 Data size: 920 Basic stats:
COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ null sort order: z
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
Statistics: Num rows: 10 Data size: 920 Basic stats:
COMPLETE Column stats: COMPLETE
- Reduce Output Operator
- key expressions: _col0 (type: string)
- null sort order: z
- sort order: +
- Map-reduce partition columns: _col0 (type: string)
- Statistics: Num rows: 10 Data size: 920 Basic stats:
COMPLETE Column stats: COMPLETE
- value expressions: _col1 (type: int)
+ value expressions: _col1 (type: int)
Execution mode: vectorized, llap
LLAP IO: all inputs (cache only)
Reducer 2
@@ -211,24 +207,20 @@ STAGE PLANS:
TableScan
alias: tbl_ice_mixed_all_types
Statistics: Num rows: 2 Data size: 746 Basic stats: COMPLETE
Column stats: COMPLETE
- Select Operator
- expressions: t_float (type: float), t_double (type:
double), t_boolean (type: boolean), t_int (type: int), t_bigint (type: bigint),
t_binary (type: binary), t_string (type: string), t_timestamp (type:
timestamp), t_date (type: date), t_decimal (type: decimal(4,2))
- outputColumnNames: t_float, t_double, t_boolean, t_int,
t_bigint, t_binary, t_string, t_timestamp, t_date, t_decimal
+ Group By Operator
+ aggregations: max(t_float)
+ keys: t_double (type: double), t_boolean (type: boolean),
t_int (type: int), t_bigint (type: bigint), t_binary (type: binary), t_string
(type: string), t_timestamp (type: timestamp), t_date (type: date), t_decimal
(type: decimal(4,2))
+ minReductionHashAggr: 0.99
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4,
_col5, _col6, _col7, _col8, _col9
Statistics: Num rows: 2 Data size: 746 Basic stats:
COMPLETE Column stats: COMPLETE
- Group By Operator
- aggregations: max(t_float)
- keys: t_double (type: double), t_boolean (type:
boolean), t_int (type: int), t_bigint (type: bigint), t_binary (type: binary),
t_string (type: string), t_timestamp (type: timestamp), t_date (type: date),
t_decimal (type: decimal(4,2))
- minReductionHashAggr: 0.99
- mode: hash
- outputColumnNames: _col0, _col1, _col2, _col3, _col4,
_col5, _col6, _col7, _col8, _col9
+ Reduce Output Operator
+ key expressions: _col0 (type: double), _col1 (type:
boolean), _col2 (type: int), _col3 (type: bigint), _col4 (type: binary), _col5
(type: string), _col6 (type: timestamp), _col7 (type: date), _col8 (type:
decimal(4,2))
+ null sort order: zzzzzzzzz
+ sort order: +++++++++
+ Map-reduce partition columns: _col0 (type: double),
_col1 (type: boolean), _col2 (type: int), _col3 (type: bigint), _col4 (type:
binary), _col5 (type: string), _col6 (type: timestamp), _col7 (type: date),
_col8 (type: decimal(4,2))
Statistics: Num rows: 2 Data size: 746 Basic stats:
COMPLETE Column stats: COMPLETE
- Reduce Output Operator
- key expressions: _col0 (type: double), _col1 (type:
boolean), _col2 (type: int), _col3 (type: bigint), _col4 (type: binary), _col5
(type: string), _col6 (type: timestamp), _col7 (type: date), _col8 (type:
decimal(4,2))
- null sort order: zzzzzzzzz
- sort order: +++++++++
- Map-reduce partition columns: _col0 (type: double),
_col1 (type: boolean), _col2 (type: int), _col3 (type: bigint), _col4 (type:
binary), _col5 (type: string), _col6 (type: timestamp), _col7 (type: date),
_col8 (type: decimal(4,2))
- Statistics: Num rows: 2 Data size: 746 Basic stats:
COMPLETE Column stats: COMPLETE
- value expressions: _col9 (type: float)
+ value expressions: _col9 (type: float)
Execution mode: vectorized, llap
LLAP IO: all inputs (cache only)
Reducer 2
diff --git
a/iceberg/iceberg-handler/src/test/results/positive/llap/vectorized_iceberg_read_orc.q.out
b/iceberg/iceberg-handler/src/test/results/positive/llap/vectorized_iceberg_read_orc.q.out
index 6b2a8780d6..dbfa9b6c90 100644
---
a/iceberg/iceberg-handler/src/test/results/positive/llap/vectorized_iceberg_read_orc.q.out
+++
b/iceberg/iceberg-handler/src/test/results/positive/llap/vectorized_iceberg_read_orc.q.out
@@ -53,24 +53,20 @@ STAGE PLANS:
TableScan
alias: tbl_ice_orc
Statistics: Num rows: 10 Data size: 920 Basic stats:
COMPLETE Column stats: COMPLETE
- Select Operator
- expressions: a (type: int), b (type: string)
- outputColumnNames: a, b
- Statistics: Num rows: 10 Data size: 920 Basic stats:
COMPLETE Column stats: COMPLETE
- Group By Operator
- aggregations: max(a)
- keys: b (type: string)
- minReductionHashAggr: 0.5
- mode: hash
- outputColumnNames: _col0, _col1
+ Group By Operator
+ aggregations: max(a)
+ keys: b (type: string)
+ minReductionHashAggr: 0.5
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 5 Data size: 460 Basic stats:
COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ null sort order: z
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
Statistics: Num rows: 5 Data size: 460 Basic stats:
COMPLETE Column stats: COMPLETE
- Reduce Output Operator
- key expressions: _col0 (type: string)
- null sort order: z
- sort order: +
- Map-reduce partition columns: _col0 (type: string)
- Statistics: Num rows: 5 Data size: 460 Basic stats:
COMPLETE Column stats: COMPLETE
- value expressions: _col1 (type: int)
+ value expressions: _col1 (type: int)
Execution mode: vectorized, llap
LLAP IO: all inputs (cache only)
Reducer 2
@@ -174,24 +170,20 @@ STAGE PLANS:
TableScan
alias: tbl_ice_orc_all_types
Statistics: Num rows: 1 Data size: 372 Basic stats: COMPLETE
Column stats: COMPLETE
- Select Operator
- expressions: t_float (type: float), t_double (type:
double), t_boolean (type: boolean), t_int (type: int), t_bigint (type: bigint),
t_binary (type: binary), t_string (type: string), t_timestamp (type:
timestamp), t_date (type: date), t_decimal (type: decimal(4,2))
- outputColumnNames: t_float, t_double, t_boolean, t_int,
t_bigint, t_binary, t_string, t_timestamp, t_date, t_decimal
+ Group By Operator
+ aggregations: max(t_float)
+ keys: t_double (type: double), t_boolean (type: boolean),
t_int (type: int), t_bigint (type: bigint), t_binary (type: binary), t_string
(type: string), t_timestamp (type: timestamp), t_date (type: date), t_decimal
(type: decimal(4,2))
+ minReductionHashAggr: 0.99
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4,
_col5, _col6, _col7, _col8, _col9
Statistics: Num rows: 1 Data size: 372 Basic stats:
COMPLETE Column stats: COMPLETE
- Group By Operator
- aggregations: max(t_float)
- keys: t_double (type: double), t_boolean (type:
boolean), t_int (type: int), t_bigint (type: bigint), t_binary (type: binary),
t_string (type: string), t_timestamp (type: timestamp), t_date (type: date),
t_decimal (type: decimal(4,2))
- minReductionHashAggr: 0.99
- mode: hash
- outputColumnNames: _col0, _col1, _col2, _col3, _col4,
_col5, _col6, _col7, _col8, _col9
+ Reduce Output Operator
+ key expressions: _col0 (type: double), _col1 (type:
boolean), _col2 (type: int), _col3 (type: bigint), _col4 (type: binary), _col5
(type: string), _col6 (type: timestamp), _col7 (type: date), _col8 (type:
decimal(4,2))
+ null sort order: zzzzzzzzz
+ sort order: +++++++++
+ Map-reduce partition columns: _col0 (type: double),
_col1 (type: boolean), _col2 (type: int), _col3 (type: bigint), _col4 (type:
binary), _col5 (type: string), _col6 (type: timestamp), _col7 (type: date),
_col8 (type: decimal(4,2))
Statistics: Num rows: 1 Data size: 372 Basic stats:
COMPLETE Column stats: COMPLETE
- Reduce Output Operator
- key expressions: _col0 (type: double), _col1 (type:
boolean), _col2 (type: int), _col3 (type: bigint), _col4 (type: binary), _col5
(type: string), _col6 (type: timestamp), _col7 (type: date), _col8 (type:
decimal(4,2))
- null sort order: zzzzzzzzz
- sort order: +++++++++
- Map-reduce partition columns: _col0 (type: double),
_col1 (type: boolean), _col2 (type: int), _col3 (type: bigint), _col4 (type:
binary), _col5 (type: string), _col6 (type: timestamp), _col7 (type: date),
_col8 (type: decimal(4,2))
- Statistics: Num rows: 1 Data size: 372 Basic stats:
COMPLETE Column stats: COMPLETE
- value expressions: _col9 (type: float)
+ value expressions: _col9 (type: float)
Execution mode: vectorized, llap
LLAP IO: all inputs (cache only)
Reducer 2
diff --git
a/iceberg/iceberg-handler/src/test/results/positive/llap/vectorized_iceberg_read_parquet.q.out
b/iceberg/iceberg-handler/src/test/results/positive/llap/vectorized_iceberg_read_parquet.q.out
index fce8eaa59f..ba31ed21cb 100644
---
a/iceberg/iceberg-handler/src/test/results/positive/llap/vectorized_iceberg_read_parquet.q.out
+++
b/iceberg/iceberg-handler/src/test/results/positive/llap/vectorized_iceberg_read_parquet.q.out
@@ -53,24 +53,20 @@ STAGE PLANS:
TableScan
alias: tbl_ice_parquet
Statistics: Num rows: 10 Data size: 920 Basic stats:
COMPLETE Column stats: COMPLETE
- Select Operator
- expressions: a (type: int), b (type: string)
- outputColumnNames: a, b
- Statistics: Num rows: 10 Data size: 920 Basic stats:
COMPLETE Column stats: COMPLETE
- Group By Operator
- aggregations: max(a)
- keys: b (type: string)
- minReductionHashAggr: 0.5
- mode: hash
- outputColumnNames: _col0, _col1
+ Group By Operator
+ aggregations: max(a)
+ keys: b (type: string)
+ minReductionHashAggr: 0.5
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 5 Data size: 460 Basic stats:
COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ null sort order: z
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
Statistics: Num rows: 5 Data size: 460 Basic stats:
COMPLETE Column stats: COMPLETE
- Reduce Output Operator
- key expressions: _col0 (type: string)
- null sort order: z
- sort order: +
- Map-reduce partition columns: _col0 (type: string)
- Statistics: Num rows: 5 Data size: 460 Basic stats:
COMPLETE Column stats: COMPLETE
- value expressions: _col1 (type: int)
+ value expressions: _col1 (type: int)
Execution mode: vectorized, llap
LLAP IO: all inputs (cache only)
Reducer 2
@@ -174,24 +170,20 @@ STAGE PLANS:
TableScan
alias: tbl_ice_parquet_all_types
Statistics: Num rows: 1 Data size: 372 Basic stats: COMPLETE
Column stats: COMPLETE
- Select Operator
- expressions: t_float (type: float), t_double (type:
double), t_boolean (type: boolean), t_int (type: int), t_bigint (type: bigint),
t_binary (type: binary), t_string (type: string), t_timestamp (type:
timestamp), t_date (type: date), t_decimal (type: decimal(4,2))
- outputColumnNames: t_float, t_double, t_boolean, t_int,
t_bigint, t_binary, t_string, t_timestamp, t_date, t_decimal
+ Group By Operator
+ aggregations: max(t_float)
+ keys: t_double (type: double), t_boolean (type: boolean),
t_int (type: int), t_bigint (type: bigint), t_binary (type: binary), t_string
(type: string), t_timestamp (type: timestamp), t_date (type: date), t_decimal
(type: decimal(4,2))
+ minReductionHashAggr: 0.99
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4,
_col5, _col6, _col7, _col8, _col9
Statistics: Num rows: 1 Data size: 372 Basic stats:
COMPLETE Column stats: COMPLETE
- Group By Operator
- aggregations: max(t_float)
- keys: t_double (type: double), t_boolean (type:
boolean), t_int (type: int), t_bigint (type: bigint), t_binary (type: binary),
t_string (type: string), t_timestamp (type: timestamp), t_date (type: date),
t_decimal (type: decimal(4,2))
- minReductionHashAggr: 0.99
- mode: hash
- outputColumnNames: _col0, _col1, _col2, _col3, _col4,
_col5, _col6, _col7, _col8, _col9
+ Reduce Output Operator
+ key expressions: _col0 (type: double), _col1 (type:
boolean), _col2 (type: int), _col3 (type: bigint), _col4 (type: binary), _col5
(type: string), _col6 (type: timestamp), _col7 (type: date), _col8 (type:
decimal(4,2))
+ null sort order: zzzzzzzzz
+ sort order: +++++++++
+ Map-reduce partition columns: _col0 (type: double),
_col1 (type: boolean), _col2 (type: int), _col3 (type: bigint), _col4 (type:
binary), _col5 (type: string), _col6 (type: timestamp), _col7 (type: date),
_col8 (type: decimal(4,2))
Statistics: Num rows: 1 Data size: 372 Basic stats:
COMPLETE Column stats: COMPLETE
- Reduce Output Operator
- key expressions: _col0 (type: double), _col1 (type:
boolean), _col2 (type: int), _col3 (type: bigint), _col4 (type: binary), _col5
(type: string), _col6 (type: timestamp), _col7 (type: date), _col8 (type:
decimal(4,2))
- null sort order: zzzzzzzzz
- sort order: +++++++++
- Map-reduce partition columns: _col0 (type: double),
_col1 (type: boolean), _col2 (type: int), _col3 (type: bigint), _col4 (type:
binary), _col5 (type: string), _col6 (type: timestamp), _col7 (type: date),
_col8 (type: decimal(4,2))
- Statistics: Num rows: 1 Data size: 372 Basic stats:
COMPLETE Column stats: COMPLETE
- value expressions: _col9 (type: float)
+ value expressions: _col9 (type: float)
Execution mode: vectorized, llap
LLAP IO: all inputs (cache only)
Reducer 2
diff --git
a/iceberg/iceberg-handler/src/test/results/positive/vectorized_iceberg_read_mixed.q.out
b/iceberg/iceberg-handler/src/test/results/positive/vectorized_iceberg_read_mixed.q.out
index 2701494205..e6d990caa4 100644
---
a/iceberg/iceberg-handler/src/test/results/positive/vectorized_iceberg_read_mixed.q.out
+++
b/iceberg/iceberg-handler/src/test/results/positive/vectorized_iceberg_read_mixed.q.out
@@ -62,18 +62,16 @@ Stage-0
limit:-1
Stage-1
Reducer 2 vectorized
- File Output Operator [FS_11]
- Group By Operator [GBY_10] (rows=10 width=92)
+ File Output Operator [FS_10]
+ Group By Operator [GBY_9] (rows=10 width=92)
Output:["_col0","_col1"],aggregations:["max(VALUE._col0)"],keys:KEY._col0
<-Map 1 [SIMPLE_EDGE] vectorized
- SHUFFLE [RS_9]
+ SHUFFLE [RS_8]
PartitionCols:_col0
- Group By Operator [GBY_8] (rows=10 width=92)
+ Group By Operator [GBY_7] (rows=10 width=92)
Output:["_col0","_col1"],aggregations:["max(a)"],keys:b
- Select Operator [SEL_7] (rows=19 width=92)
- Output:["a","b"]
- TableScan [TS_0] (rows=19 width=92)
-
default@tbl_ice_mixed,tbl_ice_mixed,Tbl:COMPLETE,Col:COMPLETE,Output:["a","b"]
+ TableScan [TS_0] (rows=19 width=92)
+
default@tbl_ice_mixed,tbl_ice_mixed,Tbl:COMPLETE,Col:COMPLETE,Output:["a","b"]
PREHOOK: query: select b, max(a) from tbl_ice_mixed group by b
PREHOOK: type: QUERY
@@ -167,20 +165,18 @@ Stage-0
limit:-1
Stage-1
Reducer 2 vectorized
- File Output Operator [FS_12]
- Select Operator [SEL_11] (rows=2 width=373)
+ File Output Operator [FS_11]
+ Select Operator [SEL_10] (rows=2 width=373)
Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9"]
- Group By Operator [GBY_10] (rows=2 width=373)
+ Group By Operator [GBY_9] (rows=2 width=373)
Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9"],aggregations:["max(VALUE._col0)"],keys:KEY._col0,
KEY._col1, KEY._col2, KEY._col3, KEY._col4, KEY._col5, KEY._col6, KEY._col7,
KEY._col8
<-Map 1 [SIMPLE_EDGE] vectorized
- SHUFFLE [RS_9]
+ SHUFFLE [RS_8]
PartitionCols:_col0, _col1, _col2, _col3, _col4, _col5, _col6,
_col7, _col8
- Group By Operator [GBY_8] (rows=2 width=373)
+ Group By Operator [GBY_7] (rows=2 width=373)
Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9"],aggregations:["max(t_float)"],keys:t_double,
t_boolean, t_int, t_bigint, t_binary, t_string, t_timestamp, t_date, t_decimal
- Select Operator [SEL_7] (rows=2 width=373)
-
Output:["t_float","t_double","t_boolean","t_int","t_bigint","t_binary","t_string","t_timestamp","t_date","t_decimal"]
- TableScan [TS_0] (rows=2 width=373)
-
default@tbl_ice_mixed_all_types,tbl_ice_mixed_all_types,Tbl:COMPLETE,Col:COMPLETE,Output:["t_float","t_double","t_boolean","t_int","t_bigint","t_binary","t_string","t_timestamp","t_date","t_decimal"]
+ TableScan [TS_0] (rows=2 width=373)
+
default@tbl_ice_mixed_all_types,tbl_ice_mixed_all_types,Tbl:COMPLETE,Col:COMPLETE,Output:["t_float","t_double","t_boolean","t_int","t_bigint","t_binary","t_string","t_timestamp","t_date","t_decimal"]
PREHOOK: query: select max(t_float), t_double, t_boolean, t_int, t_bigint,
t_binary, t_string, t_timestamp, t_date, t_decimal from tbl_ice_mixed_all_types
group by t_double, t_boolean, t_int, t_bigint, t_binary, t_string,
t_timestamp, t_date, t_decimal
diff --git
a/iceberg/iceberg-handler/src/test/results/positive/vectorized_iceberg_read_orc.q.out
b/iceberg/iceberg-handler/src/test/results/positive/vectorized_iceberg_read_orc.q.out
index e6b1ceb69b..91c7bb2c54 100644
---
a/iceberg/iceberg-handler/src/test/results/positive/vectorized_iceberg_read_orc.q.out
+++
b/iceberg/iceberg-handler/src/test/results/positive/vectorized_iceberg_read_orc.q.out
@@ -46,18 +46,16 @@ Stage-0
limit:-1
Stage-1
Reducer 2 vectorized
- File Output Operator [FS_11]
- Group By Operator [GBY_10] (rows=5 width=92)
+ File Output Operator [FS_10]
+ Group By Operator [GBY_9] (rows=5 width=92)
Output:["_col0","_col1"],aggregations:["max(VALUE._col0)"],keys:KEY._col0
<-Map 1 [SIMPLE_EDGE] vectorized
- SHUFFLE [RS_9]
+ SHUFFLE [RS_8]
PartitionCols:_col0
- Group By Operator [GBY_8] (rows=5 width=92)
+ Group By Operator [GBY_7] (rows=5 width=92)
Output:["_col0","_col1"],aggregations:["max(a)"],keys:b
- Select Operator [SEL_7] (rows=10 width=92)
- Output:["a","b"]
- TableScan [TS_0] (rows=10 width=92)
-
default@tbl_ice_orc,tbl_ice_orc,Tbl:COMPLETE,Col:COMPLETE,Output:["a","b"]
+ TableScan [TS_0] (rows=10 width=92)
+
default@tbl_ice_orc,tbl_ice_orc,Tbl:COMPLETE,Col:COMPLETE,Output:["a","b"]
PREHOOK: query: select b, max(a) from tbl_ice_orc group by b
PREHOOK: type: QUERY
@@ -130,20 +128,18 @@ Stage-0
limit:-1
Stage-1
Reducer 2 vectorized
- File Output Operator [FS_12]
- Select Operator [SEL_11] (rows=1 width=372)
+ File Output Operator [FS_11]
+ Select Operator [SEL_10] (rows=1 width=372)
Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9"]
- Group By Operator [GBY_10] (rows=1 width=372)
+ Group By Operator [GBY_9] (rows=1 width=372)
Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9"],aggregations:["max(VALUE._col0)"],keys:KEY._col0,
KEY._col1, KEY._col2, KEY._col3, KEY._col4, KEY._col5, KEY._col6, KEY._col7,
KEY._col8
<-Map 1 [SIMPLE_EDGE] vectorized
- SHUFFLE [RS_9]
+ SHUFFLE [RS_8]
PartitionCols:_col0, _col1, _col2, _col3, _col4, _col5, _col6,
_col7, _col8
- Group By Operator [GBY_8] (rows=1 width=372)
+ Group By Operator [GBY_7] (rows=1 width=372)
Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9"],aggregations:["max(t_float)"],keys:t_double,
t_boolean, t_int, t_bigint, t_binary, t_string, t_timestamp, t_date, t_decimal
- Select Operator [SEL_7] (rows=1 width=372)
-
Output:["t_float","t_double","t_boolean","t_int","t_bigint","t_binary","t_string","t_timestamp","t_date","t_decimal"]
- TableScan [TS_0] (rows=1 width=372)
-
default@tbl_ice_orc_all_types,tbl_ice_orc_all_types,Tbl:COMPLETE,Col:COMPLETE,Output:["t_float","t_double","t_boolean","t_int","t_bigint","t_binary","t_string","t_timestamp","t_date","t_decimal"]
+ TableScan [TS_0] (rows=1 width=372)
+
default@tbl_ice_orc_all_types,tbl_ice_orc_all_types,Tbl:COMPLETE,Col:COMPLETE,Output:["t_float","t_double","t_boolean","t_int","t_bigint","t_binary","t_string","t_timestamp","t_date","t_decimal"]
PREHOOK: query: select max(t_float), t_double, t_boolean, t_int, t_bigint,
t_binary, t_string, t_timestamp, t_date, t_decimal from tbl_ice_orc_all_types
group by t_double, t_boolean, t_int, t_bigint, t_binary, t_string,
t_timestamp, t_date, t_decimal
diff --git
a/iceberg/iceberg-handler/src/test/results/positive/vectorized_iceberg_read_parquet.q.out
b/iceberg/iceberg-handler/src/test/results/positive/vectorized_iceberg_read_parquet.q.out
index 34faa886d7..43da19476b 100644
---
a/iceberg/iceberg-handler/src/test/results/positive/vectorized_iceberg_read_parquet.q.out
+++
b/iceberg/iceberg-handler/src/test/results/positive/vectorized_iceberg_read_parquet.q.out
@@ -46,18 +46,16 @@ Stage-0
limit:-1
Stage-1
Reducer 2 vectorized
- File Output Operator [FS_11]
- Group By Operator [GBY_10] (rows=5 width=92)
+ File Output Operator [FS_10]
+ Group By Operator [GBY_9] (rows=5 width=92)
Output:["_col0","_col1"],aggregations:["max(VALUE._col0)"],keys:KEY._col0
<-Map 1 [SIMPLE_EDGE] vectorized
- SHUFFLE [RS_9]
+ SHUFFLE [RS_8]
PartitionCols:_col0
- Group By Operator [GBY_8] (rows=5 width=92)
+ Group By Operator [GBY_7] (rows=5 width=92)
Output:["_col0","_col1"],aggregations:["max(a)"],keys:b
- Select Operator [SEL_7] (rows=10 width=92)
- Output:["a","b"]
- TableScan [TS_0] (rows=10 width=92)
-
default@tbl_ice_parquet,tbl_ice_parquet,Tbl:COMPLETE,Col:COMPLETE,Output:["a","b"]
+ TableScan [TS_0] (rows=10 width=92)
+
default@tbl_ice_parquet,tbl_ice_parquet,Tbl:COMPLETE,Col:COMPLETE,Output:["a","b"]
PREHOOK: query: select b, max(a) from tbl_ice_parquet group by b
PREHOOK: type: QUERY
@@ -130,20 +128,18 @@ Stage-0
limit:-1
Stage-1
Reducer 2 vectorized
- File Output Operator [FS_12]
- Select Operator [SEL_11] (rows=1 width=372)
+ File Output Operator [FS_11]
+ Select Operator [SEL_10] (rows=1 width=372)
Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9"]
- Group By Operator [GBY_10] (rows=1 width=372)
+ Group By Operator [GBY_9] (rows=1 width=372)
Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9"],aggregations:["max(VALUE._col0)"],keys:KEY._col0,
KEY._col1, KEY._col2, KEY._col3, KEY._col4, KEY._col5, KEY._col6, KEY._col7,
KEY._col8
<-Map 1 [SIMPLE_EDGE] vectorized
- SHUFFLE [RS_9]
+ SHUFFLE [RS_8]
PartitionCols:_col0, _col1, _col2, _col3, _col4, _col5, _col6,
_col7, _col8
- Group By Operator [GBY_8] (rows=1 width=372)
+ Group By Operator [GBY_7] (rows=1 width=372)
Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9"],aggregations:["max(t_float)"],keys:t_double,
t_boolean, t_int, t_bigint, t_binary, t_string, t_timestamp, t_date, t_decimal
- Select Operator [SEL_7] (rows=1 width=372)
-
Output:["t_float","t_double","t_boolean","t_int","t_bigint","t_binary","t_string","t_timestamp","t_date","t_decimal"]
- TableScan [TS_0] (rows=1 width=372)
-
default@tbl_ice_parquet_all_types,tbl_ice_parquet_all_types,Tbl:COMPLETE,Col:COMPLETE,Output:["t_float","t_double","t_boolean","t_int","t_bigint","t_binary","t_string","t_timestamp","t_date","t_decimal"]
+ TableScan [TS_0] (rows=1 width=372)
+
default@tbl_ice_parquet_all_types,tbl_ice_parquet_all_types,Tbl:COMPLETE,Col:COMPLETE,Output:["t_float","t_double","t_boolean","t_int","t_bigint","t_binary","t_string","t_timestamp","t_date","t_decimal"]
PREHOOK: query: select max(t_float), t_double, t_boolean, t_int, t_bigint,
t_binary, t_string, t_timestamp, t_date, t_decimal from
tbl_ice_parquet_all_types
group by t_double, t_boolean, t_int, t_bigint, t_binary, t_string,
t_timestamp, t_date, t_decimal
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/AcidUtils.java
b/ql/src/java/org/apache/hadoop/hive/ql/io/AcidUtils.java
index 4582878817..ca2516123c 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/AcidUtils.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/io/AcidUtils.java
@@ -3364,7 +3364,7 @@ public class AcidUtils {
public static boolean isNonNativeAcidTable(Table table) {
return table != null && table.getStorageHandler() != null &&
- table.getStorageHandler().supportsAcidOperations() !=
HiveStorageHandler.AcidSupportType.NONE;
+ table.getStorageHandler().supportsAcidOperations(table) !=
HiveStorageHandler.AcidSupportType.NONE;
}
/**
@@ -3386,7 +3386,7 @@ public class AcidUtils {
public static boolean acidTableWithoutTransactions(Table table) {
return table != null && table.getStorageHandler() != null &&
- table.getStorageHandler().supportsAcidOperations() ==
HiveStorageHandler.AcidSupportType.WITHOUT_TRANSACTIONS;
+ table.getStorageHandler().supportsAcidOperations(table) ==
HiveStorageHandler.AcidSupportType.WITHOUT_TRANSACTIONS;
}
static class DirInfoValue {
diff --git
a/ql/src/java/org/apache/hadoop/hive/ql/metadata/HiveStorageHandler.java
b/ql/src/java/org/apache/hadoop/hive/ql/metadata/HiveStorageHandler.java
index 65e458cc65..bdfdf3fde3 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/metadata/HiveStorageHandler.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/metadata/HiveStorageHandler.java
@@ -291,7 +291,7 @@ public interface HiveStorageHandler extends Configurable {
*
* @return the table's ACID support type
*/
- default AcidSupportType supportsAcidOperations() {
+ default AcidSupportType
supportsAcidOperations(org.apache.hadoop.hive.ql.metadata.Table table) {
return AcidSupportType.NONE;
}
@@ -299,7 +299,8 @@ public interface HiveStorageHandler extends Configurable {
* Specifies which additional virtual columns should be added to the virtual
column registry during compilation
* for tables that support ACID operations.
*
- * Should only return a non-empty list if {@link
HiveStorageHandler#supportsAcidOperations()} ()} returns something
+ * Should only return a non-empty list if
+ * {@link
HiveStorageHandler#supportsAcidOperations(org.apache.hadoop.hive.ql.metadata.Table)}
()} returns something
* other NONE.
*
* @return the list of ACID virtual columns
@@ -318,7 +319,8 @@ public interface HiveStorageHandler extends Configurable {
*
* This method specifies which columns should be injected into the
<selectCols> part of the rewritten query.
*
- * Should only return a non-empty list if {@link
HiveStorageHandler#supportsAcidOperations()} returns something
+ * Should only return a non-empty list if
+ * {@link
HiveStorageHandler#supportsAcidOperations(org.apache.hadoop.hive.ql.metadata.Table)}
returns something
* other NONE.
*
* @param table the table which is being deleted/updated/merged into
@@ -336,7 +338,8 @@ public interface HiveStorageHandler extends Configurable {
*
* This method specifies which columns should be injected into the
<sortCols> part of the rewritten query.
*
- * Should only return a non-empty list if {@link
HiveStorageHandler#supportsAcidOperations()} returns something
+ * Should only return a non-empty list if
+ * {@link
HiveStorageHandler#supportsAcidOperations(org.apache.hadoop.hive.ql.metadata.Table)}
returns something
* other NONE.
*
* @param table the table which is being deleted/updated/merged into
diff --git
a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveRelOptMaterializationValidator.java
b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveRelOptMaterializationValidator.java
index 3897a2912a..d8f0a3041c 100644
---
a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveRelOptMaterializationValidator.java
+++
b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveRelOptMaterializationValidator.java
@@ -89,8 +89,9 @@ public class HiveRelOptMaterializationValidator extends
HiveRelShuttleImpl {
if (tab.isTemporary()) {
fail(tab.getTableName() + " is a temporary table");
}
- if (tab.getTableType() == TableType.EXTERNAL_TABLE &&
!AcidUtils.isNonNativeAcidTable(tab)) {
- fail(tab.getFullyQualifiedName() + " is an external table");
+ if (tab.getTableType() == TableType.EXTERNAL_TABLE &&
+ !(tab.getStorageHandler() != null &&
tab.getStorageHandler().areSnapshotsSupported())) {
+ fail(tab.getFullyQualifiedName() + " is an external table and does not
support snapshots");
}
return hiveScan;
}
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
index 3e12bf4efd..2458cbb096 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
@@ -14222,7 +14222,7 @@ public class SemanticAnalyzer extends
BaseSemanticAnalyzer {
}
if (AcidUtils.isTransactionalTable(table)) {
++nativeAcidCount;
- } else if (AcidUtils.isNonNativeAcidTable(table) &&
table.getStorageHandler().areSnapshotsSupported()) {
+ } else if (table.isNonNative() &&
table.getStorageHandler().areSnapshotsSupported()) {
++supportsSnapshotCount;
} else {
throw new SemanticException("Automatic rewriting for materialized
view cannot "