This is an automated email from the ASF dual-hosted git repository.
joemcdonnell pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git
The following commit(s) were added to refs/heads/master by this push:
new 82d8e382a IMPALA-13296: Check column compatibility earlier for table
migration
82d8e382a is described below
commit 82d8e382a3fa21e2930c0dd5e8198db647e36f17
Author: Gabor Kaszab <[email protected]>
AuthorDate: Thu Aug 15 13:37:21 2024 +0200
IMPALA-13296: Check column compatibility earlier for table migration
When migrating a Hive table to Iceberg there can be columns in the
source table that are not supported by Iceberg. For these tables the
table migration process gives an error. However, this is a multi-step
process and the check for column compatibility is performed after the
source table had been renamed to a temporary name.
This patch brings the column type check to the analysis phase to avoid
renaming the table when there are incompatible columns.
Testing:
- Re-run the existing checks
- Adjusted the error message for this error case to reflect that we
return an AnalysisException.
Change-Id: I381e7359a55ebd84f9a1dcec3b665bdbea62a035
Reviewed-on: http://gerrit.cloudera.org:8080/21675
Reviewed-by: Peter Rozsa <[email protected]>
Tested-by: Impala Public Jenkins <[email protected]>
---
.../impala/analysis/ConvertTableToIcebergStmt.java | 13 ++++++++++++-
.../org/apache/impala/analysis/AnalyzeStmtsTest.java | 16 +++++++++-------
.../iceberg-migrate-from-external-hdfs-tables.test | 2 +-
3 files changed, 22 insertions(+), 9 deletions(-)
diff --git
a/fe/src/main/java/org/apache/impala/analysis/ConvertTableToIcebergStmt.java
b/fe/src/main/java/org/apache/impala/analysis/ConvertTableToIcebergStmt.java
index b8b0ee95f..b0b5069ba 100644
--- a/fe/src/main/java/org/apache/impala/analysis/ConvertTableToIcebergStmt.java
+++ b/fe/src/main/java/org/apache/impala/analysis/ConvertTableToIcebergStmt.java
@@ -34,13 +34,13 @@ import org.apache.impala.authorization.Privilege;
import org.apache.impala.catalog.FeFsTable;
import org.apache.impala.catalog.FeTable;
import org.apache.impala.catalog.IcebergTable;
-import org.apache.impala.catalog.PrunablePartition;
import org.apache.impala.catalog.Table;
import org.apache.impala.common.AnalysisException;
import org.apache.impala.thrift.THdfsFileFormat;
import org.apache.impala.thrift.TIcebergCatalog;
import org.apache.impala.thrift.TConvertTableRequest;
import org.apache.impala.util.AcidUtils;
+import org.apache.impala.util.IcebergSchemaConverter;
import org.apache.impala.util.IcebergUtil;
import org.apache.impala.util.MigrateTableUtil;
@@ -107,6 +107,8 @@ public class ConvertTableToIcebergStmt extends
StatementBase {
sd.getInputFormat());
}
+ checkColumnTypeCompatibility(table);
+
if (properties_.size() > 1 ||
properties_.keySet().stream().anyMatch(
key -> !key.equalsIgnoreCase(IcebergTable.ICEBERG_CATALOG)) ) {
@@ -123,6 +125,15 @@ public class ConvertTableToIcebergStmt extends
StatementBase {
createSubQueryStrings((FeFsTable) table);
}
+ private void checkColumnTypeCompatibility(FeTable table) throws
AnalysisException {
+ try {
+ IcebergSchemaConverter.convertToIcebergSchema(table.getMetaStoreTable());
+ } catch (IllegalArgumentException e) {
+ throw new AnalysisException("Incompatible column type in source table. "
+
+ e.getMessage());
+ }
+ }
+
private void createSubQueryStrings(FeFsTable table) {
setHdfsTablePropertiesQuery_ = SetTblProps.builder()
.table(table.getFullName())
diff --git a/fe/src/test/java/org/apache/impala/analysis/AnalyzeStmtsTest.java
b/fe/src/test/java/org/apache/impala/analysis/AnalyzeStmtsTest.java
index 6037d45d8..2f9405a67 100644
--- a/fe/src/test/java/org/apache/impala/analysis/AnalyzeStmtsTest.java
+++ b/fe/src/test/java/org/apache/impala/analysis/AnalyzeStmtsTest.java
@@ -5196,20 +5196,22 @@ public class AnalyzeStmtsTest extends AnalyzerTest {
@Test
public void TestConvertTable() {
- AnalyzesOk("alter table functional_parquet.alltypes convert to iceberg");
- AnalyzesOk("alter table functional_parquet.alltypes convert to iceberg"
+ AnalyzesOk("alter table functional_parquet.tinytable convert to iceberg");
+ AnalyzesOk("alter table functional_parquet.tinytable convert to iceberg"
+ " tblproperties('iceberg.catalog'='hadoop.tables')");
- AnalyzesOk("alter table functional_parquet.alltypes convert to iceberg"
+ AnalyzesOk("alter table functional_parquet.tinytable convert to iceberg"
+ " tblproperties('iceberg.catalog'='hive.catalog')");
- AnalysisError("alter table functional_parquet.alltypes convert to iceberg"
+ AnalysisError("alter table functional_parquet.alltypes convert to iceberg",
+ "Incompatible column type in source table. Unsupported Hive type:
BYTE");
+ AnalysisError("alter table functional_parquet.tinytable convert to iceberg"
+ " tblproperties('iceberg.catalog'='hadoop.catalog')",
"The Hadoop Catalog is not supported because the location may change");
- AnalysisError("alter table functional_kudu.alltypes convert to iceberg",
+ AnalysisError("alter table functional_kudu.tinytable convert to iceberg",
"CONVERT TO ICEBERG is not supported for KuduTable");
- AnalysisError("alter table functional.alltypes convert to iceberg",
+ AnalysisError("alter table functional.tinytable convert to iceberg",
"CONVERT TO ICEBERG is not supported for " +
"org.apache.hadoop.mapred.TextInputFormat");
- AnalysisError("alter table functional_parquet.alltypes convert to iceberg"
+ AnalysisError("alter table functional_parquet.tinytable convert to iceberg"
+ " tblproperties('metadata.generator.threads'='a1')",
"CONVERT TO ICEBERG only accepts 'iceberg.catalog' as TBLPROPERTY.");
}
diff --git
a/testdata/workloads/functional-query/queries/QueryTest/iceberg-migrate-from-external-hdfs-tables.test
b/testdata/workloads/functional-query/queries/QueryTest/iceberg-migrate-from-external-hdfs-tables.test
index 9d3fde193..087ebc35c 100644
---
a/testdata/workloads/functional-query/queries/QueryTest/iceberg-migrate-from-external-hdfs-tables.test
+++
b/testdata/workloads/functional-query/queries/QueryTest/iceberg-migrate-from-external-hdfs-tables.test
@@ -204,7 +204,7 @@ AnalysisException: CONVERT TO ICEBERG is not supported for
transactional tables
create table hdfs_table2 (col tinyint) stored as parquet;
alter table hdfs_table2 convert to iceberg;
---- CATCH
-Unsupported Hive type: BYTE, use integer instead
+AnalysisException: Incompatible column type in source table. Unsupported Hive
type: BYTE, use integer instead
====
---- QUERY
# Test table migration for decimal partitioned table.