This is an automated email from the ASF dual-hosted git repository.
ayushsaxena pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git
The following commit(s) were added to refs/heads/master by this push:
new f77bcc95c8d HIVE-26786: Iceberg: Read queries with copy-on-write
failing. (#3810). (Ayush Saxena, reviewed by Adam Szita)
f77bcc95c8d is described below
commit f77bcc95c8dbff39769cb85034517f00ac72cd0a
Author: Ayush Saxena <[email protected]>
AuthorDate: Wed Nov 30 00:51:53 2022 +0530
HIVE-26786: Iceberg: Read queries with copy-on-write failing. (#3810).
(Ayush Saxena, reviewed by Adam Szita)
---
.../apache/iceberg/mr/hive/HiveIcebergStorageHandler.java | 7 +++++--
.../java/org/apache/iceberg/mr/hive/TestHiveIcebergV2.java | 4 ++++
.../test/queries/positive/ctas_iceberg_partitioned_orc.q | 1 -
.../results/positive/ctas_iceberg_partitioned_orc.q.out | 1 +
ql/src/java/org/apache/hadoop/hive/ql/io/AcidUtils.java | 9 +++++----
.../apache/hadoop/hive/ql/metadata/HiveStorageHandler.java | 9 +++++----
.../org/apache/hadoop/hive/ql/parse/CalcitePlanner.java | 3 +--
.../apache/hadoop/hive/ql/parse/MergeSemanticAnalyzer.java | 2 +-
.../hadoop/hive/ql/parse/RewriteSemanticAnalyzer.java | 2 +-
.../org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java | 14 ++++++++------
.../hadoop/hive/ql/parse/UpdateDeleteSemanticAnalyzer.java | 4 +---
11 files changed, 32 insertions(+), 24 deletions(-)
diff --git
a/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergStorageHandler.java
b/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergStorageHandler.java
index f96b5e1bfae..33be9da085f 100644
---
a/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergStorageHandler.java
+++
b/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergStorageHandler.java
@@ -595,9 +595,12 @@ public class HiveIcebergStorageHandler implements
HiveStoragePredicateHandler, H
}
@Override
- public AcidSupportType
supportsAcidOperations(org.apache.hadoop.hive.ql.metadata.Table table) {
+ public AcidSupportType
supportsAcidOperations(org.apache.hadoop.hive.ql.metadata.Table table,
+ boolean isWriteOperation) {
if (table.getParameters() != null &&
"2".equals(table.getParameters().get(TableProperties.FORMAT_VERSION))) {
- checkDMLOperationMode(table);
+ if (isWriteOperation) {
+ checkDMLOperationMode(table);
+ }
return AcidSupportType.WITHOUT_TRANSACTIONS;
}
diff --git
a/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/hive/TestHiveIcebergV2.java
b/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/hive/TestHiveIcebergV2.java
index fe76bf43caf..054b5f0e3e0 100644
---
a/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/hive/TestHiveIcebergV2.java
+++
b/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/hive/TestHiveIcebergV2.java
@@ -634,6 +634,10 @@ public class TestHiveIcebergV2 extends
HiveIcebergStorageHandlerWithEngineBase {
Assert.assertTrue(e.getMessage().contains("Hive doesn't support
copy-on-write mode"));
}
+ // Try read queries, they shouldn't fail
+ shell.executeStatement("select * from customers where
first_name='Joanna'");
+ shell.executeStatement("select * from customers limit 1");
+ shell.executeStatement("select count(*) from customers");
}
private static <T> PositionDelete<T> positionDelete(CharSequence path, long
pos, T row) {
diff --git
a/iceberg/iceberg-handler/src/test/queries/positive/ctas_iceberg_partitioned_orc.q
b/iceberg/iceberg-handler/src/test/queries/positive/ctas_iceberg_partitioned_orc.q
index 5f558417092..3897c0e4ca0 100644
---
a/iceberg/iceberg-handler/src/test/queries/positive/ctas_iceberg_partitioned_orc.q
+++
b/iceberg/iceberg-handler/src/test/queries/positive/ctas_iceberg_partitioned_orc.q
@@ -1,4 +1,3 @@
---! qt:disabled:disabled the ctas doesn't set table as v2 in HMS & sets
copy-on-write which is unsupported at Hive
set
hive.query.lifetime.hooks=org.apache.iceberg.mr.hive.HiveIcebergQueryLifeTimeHook;
--! qt:replace:/(\s+uuid\s+)\S+(\s*)/$1#Masked#$2/
set hive.explain.user=false;
diff --git
a/iceberg/iceberg-handler/src/test/results/positive/ctas_iceberg_partitioned_orc.q.out
b/iceberg/iceberg-handler/src/test/results/positive/ctas_iceberg_partitioned_orc.q.out
index 863d41cc5ae..99004085770 100644
---
a/iceberg/iceberg-handler/src/test/results/positive/ctas_iceberg_partitioned_orc.q.out
+++
b/iceberg/iceberg-handler/src/test/results/positive/ctas_iceberg_partitioned_orc.q.out
@@ -273,6 +273,7 @@ Table Parameters:
EXTERNAL TRUE
bucketing_version -1
engine.hive.enabled true
+ format-version 2
iceberg.orc.files.only true
metadata_location hdfs://### HDFS PATH ###
numFiles 2
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/AcidUtils.java
b/ql/src/java/org/apache/hadoop/hive/ql/io/AcidUtils.java
index 74a28c78d86..b4c61c839be 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/AcidUtils.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/io/AcidUtils.java
@@ -3366,9 +3366,9 @@ public class AcidUtils {
}
}
- public static boolean isNonNativeAcidTable(Table table) {
+ public static boolean isNonNativeAcidTable(Table table, boolean
isWriteOperation) {
return table != null && table.getStorageHandler() != null &&
- table.getStorageHandler().supportsAcidOperations(table) !=
HiveStorageHandler.AcidSupportType.NONE;
+ table.getStorageHandler().supportsAcidOperations(table,
isWriteOperation) != HiveStorageHandler.AcidSupportType.NONE;
}
/**
@@ -3381,7 +3381,7 @@ public class AcidUtils {
if (isTransactionalTable(table)) {
return Lists.newArrayList(VirtualColumn.ROWID);
} else {
- if (isNonNativeAcidTable(table)) {
+ if (isNonNativeAcidTable(table, false)) {
return table.getStorageHandler().acidVirtualColumns();
}
}
@@ -3390,7 +3390,8 @@ public class AcidUtils {
public static boolean acidTableWithoutTransactions(Table table) {
return table != null && table.getStorageHandler() != null &&
- table.getStorageHandler().supportsAcidOperations(table) ==
HiveStorageHandler.AcidSupportType.WITHOUT_TRANSACTIONS;
+ table.getStorageHandler().supportsAcidOperations(table, true) ==
+ HiveStorageHandler.AcidSupportType.WITHOUT_TRANSACTIONS;
}
static class DirInfoValue {
diff --git
a/ql/src/java/org/apache/hadoop/hive/ql/metadata/HiveStorageHandler.java
b/ql/src/java/org/apache/hadoop/hive/ql/metadata/HiveStorageHandler.java
index b70ab657381..37d3497e694 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/metadata/HiveStorageHandler.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/metadata/HiveStorageHandler.java
@@ -293,7 +293,8 @@ public interface HiveStorageHandler extends Configurable {
*
* @return the table's ACID support type
*/
- default AcidSupportType
supportsAcidOperations(org.apache.hadoop.hive.ql.metadata.Table table) {
+ default AcidSupportType
supportsAcidOperations(org.apache.hadoop.hive.ql.metadata.Table table,
+ boolean isWriteOperation) {
return AcidSupportType.NONE;
}
@@ -302,7 +303,7 @@ public interface HiveStorageHandler extends Configurable {
* for tables that support ACID operations.
*
* Should only return a non-empty list if
- * {@link
HiveStorageHandler#supportsAcidOperations(org.apache.hadoop.hive.ql.metadata.Table)}
()} returns something
+ * {@link
HiveStorageHandler#supportsAcidOperations(org.apache.hadoop.hive.ql.metadata.Table,
boolean)} ()} returns something
* other NONE.
*
* @return the list of ACID virtual columns
@@ -322,7 +323,7 @@ public interface HiveStorageHandler extends Configurable {
* This method specifies which columns should be injected into the
<selectCols> part of the rewritten query.
*
* Should only return a non-empty list if
- * {@link
HiveStorageHandler#supportsAcidOperations(org.apache.hadoop.hive.ql.metadata.Table)}
returns something
+ * {@link
HiveStorageHandler#supportsAcidOperations(org.apache.hadoop.hive.ql.metadata.Table,
boolean)} returns something
* other NONE.
*
* @param table the table which is being deleted/updated/merged into
@@ -341,7 +342,7 @@ public interface HiveStorageHandler extends Configurable {
* This method specifies which columns should be injected into the
<sortCols> part of the rewritten query.
*
* Should only return a non-empty list if
- * {@link
HiveStorageHandler#supportsAcidOperations(org.apache.hadoop.hive.ql.metadata.Table)}
returns something
+ * {@link
HiveStorageHandler#supportsAcidOperations(org.apache.hadoop.hive.ql.metadata.Table,
boolean)} returns something
* other NONE.
*
* @param table the table which is being deleted/updated/merged into
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java
b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java
index 5a6d256cb20..7bb42e89e9f 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java
@@ -90,7 +90,6 @@ import
org.apache.calcite.rel.metadata.ChainedRelMetadataProvider;
import org.apache.calcite.rel.metadata.JaninoRelMetadataProvider;
import org.apache.calcite.rel.metadata.RelMetadataProvider;
import org.apache.calcite.rel.metadata.RelMetadataQuery;
-import org.apache.calcite.rel.rules.CoreRules;
import org.apache.calcite.rel.rules.FilterMergeRule;
import org.apache.calcite.rel.rules.JoinToMultiJoinRule;
import org.apache.calcite.rel.rules.LoptOptimizeJoinRule;
@@ -2951,7 +2950,7 @@ public class CalcitePlanner extends SemanticAnalyzer {
List<VirtualColumn> virtualCols = new ArrayList<>();
if (tableType == TableType.NATIVE) {
virtualCols = VirtualColumn.getRegistry(conf);
- if (AcidUtils.isNonNativeAcidTable(tabMetaData)) {
+ if (AcidUtils.isNonNativeAcidTable(tabMetaData, false)) {
virtualCols.addAll(tabMetaData.getStorageHandler().acidVirtualColumns());
}
for (VirtualColumn vc : virtualCols) {
diff --git
a/ql/src/java/org/apache/hadoop/hive/ql/parse/MergeSemanticAnalyzer.java
b/ql/src/java/org/apache/hadoop/hive/ql/parse/MergeSemanticAnalyzer.java
index 577890748ad..e2195512ee3 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/MergeSemanticAnalyzer.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/MergeSemanticAnalyzer.java
@@ -62,7 +62,7 @@ public class MergeSemanticAnalyzer extends
RewriteSemanticAnalyzer {
@Override
public void analyze(ASTNode tree, Table targetTable, ASTNode tableNameNode)
throws SemanticException {
- boolean nonNativeAcid = AcidUtils.isNonNativeAcidTable(targetTable);
+ boolean nonNativeAcid = AcidUtils.isNonNativeAcidTable(targetTable, true);
if (nonNativeAcid) {
throw new
SemanticException(ErrorMsg.NON_NATIVE_ACID_UPDATE.getErrorCodedMsg());
}
diff --git
a/ql/src/java/org/apache/hadoop/hive/ql/parse/RewriteSemanticAnalyzer.java
b/ql/src/java/org/apache/hadoop/hive/ql/parse/RewriteSemanticAnalyzer.java
index 874756860d6..cdce0eec80f 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/RewriteSemanticAnalyzer.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/RewriteSemanticAnalyzer.java
@@ -609,7 +609,7 @@ public abstract class RewriteSemanticAnalyzer extends
CalcitePlanner {
public static final String SUB_QUERY_ALIAS = "s";
protected ColumnAppender getColumnAppender(String subQueryAlias) {
- boolean nonNativeAcid = AcidUtils.isNonNativeAcidTable(targetTable);
+ boolean nonNativeAcid = AcidUtils.isNonNativeAcidTable(targetTable, true);
return nonNativeAcid ? new NonNativeAcidColumnAppender(targetTable, conf,
subQueryAlias) :
new NativeAcidColumnAppender(targetTable, conf, subQueryAlias);
}
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
index a3a4f3fc28e..a6d5719bec3 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
@@ -2389,8 +2389,10 @@ public class SemanticAnalyzer extends
BaseSemanticAnalyzer {
"Inconsistent data structure detected: we are writing to " +
ts.tableHandle + " in " +
name + " but it's not in isInsertIntoTable() or
getInsertOverwriteTables()";
// Disallow update and delete on non-acid tables
- boolean isFullAcid = AcidUtils.isFullAcidTable(ts.tableHandle) ||
AcidUtils.isNonNativeAcidTable(ts.tableHandle);
- if ((updating(name) || deleting(name)) && !isFullAcid) {
+ final boolean isWriteOperation = updating(name) || deleting(name);
+ boolean isFullAcid = AcidUtils.isFullAcidTable(ts.tableHandle) ||
+ AcidUtils.isNonNativeAcidTable(ts.tableHandle, isWriteOperation);
+ if (isWriteOperation && !isFullAcid) {
if (!AcidUtils.isInsertOnlyTable(ts.tableHandle)) {
// Whether we are using an acid compliant transaction manager has
already been caught in
// UpdateDeleteSemanticAnalyzer, so if we are updating or deleting
and getting nonAcid
@@ -6934,7 +6936,7 @@ public class SemanticAnalyzer extends
BaseSemanticAnalyzer {
}
} else {
// Non-native acid tables should handle their own bucketing for
updates/deletes
- if ((updating(dest) || deleting(dest)) &&
!AcidUtils.isNonNativeAcidTable(dest_tab)) {
+ if ((updating(dest) || deleting(dest)) &&
!AcidUtils.isNonNativeAcidTable(dest_tab, true)) {
partnCols = getPartitionColsFromBucketColsForUpdateDelete(input, true);
enforceBucketing = true;
}
@@ -7903,7 +7905,7 @@ public class SemanticAnalyzer extends
BaseSemanticAnalyzer {
List<ColumnInfo> vecCol = new ArrayList<ColumnInfo>();
if (updating(dest) || deleting(dest)) {
- if (AcidUtils.isNonNativeAcidTable(destinationTable)) {
+ if (AcidUtils.isNonNativeAcidTable(destinationTable, true)) {
destinationTable.getStorageHandler().acidVirtualColumns().stream()
.map(col -> new ColumnInfo(col.getName(), col.getTypeInfo(), "",
true))
.forEach(vecCol::add);
@@ -8631,7 +8633,7 @@ public class SemanticAnalyzer extends
BaseSemanticAnalyzer {
// For Non-Native ACID tables we should convert the new values as well
rowFieldsOffset = expressions.size();
- if (updating(dest) && AcidUtils.isNonNativeAcidTable(table)) {
+ if (updating(dest) && AcidUtils.isNonNativeAcidTable(table, true)) {
for (int i = 0; i < columnNumber; i++) {
ExprNodeDesc column = handleConversion(tableFields.get(i),
rowFields.get(rowFieldsOffset + i), converted, dest, i);
expressions.add(column);
@@ -11633,7 +11635,7 @@ public class SemanticAnalyzer extends
BaseSemanticAnalyzer {
if (!tab.isNonNative()) {
vcList.addAll(VirtualColumn.getRegistry(conf));
}
- if (tab.isNonNative() && AcidUtils.isNonNativeAcidTable(tab)) {
+ if (tab.isNonNative() && AcidUtils.isNonNativeAcidTable(tab, false)) {
vcList.addAll(tab.getStorageHandler().acidVirtualColumns());
}
diff --git
a/ql/src/java/org/apache/hadoop/hive/ql/parse/UpdateDeleteSemanticAnalyzer.java
b/ql/src/java/org/apache/hadoop/hive/ql/parse/UpdateDeleteSemanticAnalyzer.java
index 20fd2fe2036..a1304a87c2f 100644
---
a/ql/src/java/org/apache/hadoop/hive/ql/parse/UpdateDeleteSemanticAnalyzer.java
+++
b/ql/src/java/org/apache/hadoop/hive/ql/parse/UpdateDeleteSemanticAnalyzer.java
@@ -23,8 +23,6 @@ import java.util.List;
import java.util.Map;
import java.util.Set;
-import java.util.stream.Collectors;
-
import org.apache.hadoop.hive.metastore.api.FieldSchema;
import org.apache.hadoop.hive.ql.Context;
import org.apache.hadoop.hive.ql.ErrorMsg;
@@ -64,7 +62,7 @@ public class UpdateDeleteSemanticAnalyzer extends
RewriteSemanticAnalyzer {
reparseAndSuperAnalyze(tree, table, tabNameNode);
break;
case HiveParser.TOK_UPDATE_TABLE:
- boolean nonNativeAcid = AcidUtils.isNonNativeAcidTable(table);
+ boolean nonNativeAcid = AcidUtils.isNonNativeAcidTable(table, true);
if (nonNativeAcid) {
throw new
SemanticException(ErrorMsg.NON_NATIVE_ACID_UPDATE.getErrorCodedMsg());
}