This is an automated email from the ASF dual-hosted git repository.
stigahuang pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git
The following commit(s) were added to refs/heads/master by this push:
new b18999fe0 IMPALA-12845: Crash with DESCRIBE on a complex type from an
Iceberg table
b18999fe0 is described below
commit b18999fe0975a0795c4b10243d7c5465ebef9ed3
Author: Daniel Becker <[email protected]>
AuthorDate: Fri Mar 1 17:57:51 2024 +0100
IMPALA-12845: Crash with DESCRIBE on a complex type from an Iceberg table
A DESCRIBE statement on a complex column contained in an Iceberg table
runs into a DCHECK and crashes Impala. An example with an array:
describe functional_parquet.iceberg_resolution_test_external.phone
Note that this also happens with Iceberg metadata tables, for example:
describe functional_parquet.iceberg_query_metadata.\
entries.readable_metrics;
With non-Iceberg tables there is no error.
The problem is that for Iceberg tables, the DESCRIBE statement returns
four columns: "name", "type", "comment" and "nullable" (only Iceberg and
Kudu tables have "nullable"). However, the DESCRIBE statement response
for complex types only contains the first three columns, i.e. no column
for "nullable". But as the table is an Iceberg table, the 'metadata_'
field of HS2ColumnarResultSet is still populated with four columns.
The DCHECK in HS2ColumnarResultSet::AddOneRow() expects the number of
columns to be the same in the DESCRIBE statement response and the
'metadata_' field.
This commit solves the problem by only adding the "nullable" column to
the 'metadata_' field if the target of the DESCRIBE statement is a
table, not a complex type.
Note that Kudu tables do not support complex types so this issue does
not arise there.
This change also addresses a minor issue: DescribeTableStmt::analyze()
did not check whether the statement was already analyzed and did not set
the 'analyzer_' field which would indicate that analysis had already
been done. This is now corrected.
Testing:
- added tests in describe-path.test for arrays, maps and structs from
regular Iceberg tables and metadata tables.
Change-Id: I5eda21a41167cc1fda183aa16fd6276a6a16f5d3
Reviewed-on: http://gerrit.cloudera.org:8080/21105
Reviewed-by: Impala Public Jenkins <[email protected]>
Tested-by: Impala Public Jenkins <[email protected]>
---
.../apache/impala/analysis/DescribeTableStmt.java | 46 +++++++++++------
.../java/org/apache/impala/service/Frontend.java | 25 +++++-----
.../queries/QueryTest/describe-path.test | 57 ++++++++++++++++++++++
3 files changed, 100 insertions(+), 28 deletions(-)
diff --git a/fe/src/main/java/org/apache/impala/analysis/DescribeTableStmt.java
b/fe/src/main/java/org/apache/impala/analysis/DescribeTableStmt.java
index 4830354df..37ac060bf 100644
--- a/fe/src/main/java/org/apache/impala/analysis/DescribeTableStmt.java
+++ b/fe/src/main/java/org/apache/impala/analysis/DescribeTableStmt.java
@@ -85,6 +85,15 @@ public class DescribeTableStmt extends StatementBase {
public FeTable getTable() { return table_; }
public TDescribeOutputStyle getOutputStyle() { return outputStyle_; }
+
+ /**
+ * Returns true if this statement is to describe a table and not a complex
type.
+ */
+ public boolean targetsTable() {
+ Preconditions.checkState(isAnalyzed());
+ return path_.destTable() != null;
+ }
+
@Override
public void collectTableRefs(List<TableRef> tblRefs) {
tblRefs.add(new TableRef(rawPath_, null));
@@ -92,6 +101,9 @@ public class DescribeTableStmt extends StatementBase {
@Override
public void analyze(Analyzer analyzer) throws AnalysisException {
+ if (isAnalyzed()) return;
+ super.analyze(analyzer);
+
try {
path_ = analyzer.resolvePath(rawPath_, PathType.ANY);
} catch (AnalysisException ae) {
@@ -127,29 +139,31 @@ public class DescribeTableStmt extends StatementBase {
Privilege.ANY);
checkMinimalForIcebergMetadataTable();
- // Describing a table.
- if (path_.destTable() != null) return;
+ if (!targetsTable()) analyzeComplexType(analyzer);
+ }
+ private void analyzeComplexType(Analyzer analyzer) throws AnalysisException {
analyzer.registerPrivReq(builder ->
builder.onColumn(path_.getRootTable().getDb().getName(),
- path_.getRootTable().getName(),
- path_.getRawPath().get(0), path_.getRootTable().getOwnerUser())
- .any()
- .build());
-
- if (path_.destType().isComplexType()) {
- if (outputStyle_ == TDescribeOutputStyle.FORMATTED ||
- outputStyle_ == TDescribeOutputStyle.EXTENDED) {
- throw new AnalysisException("DESCRIBE FORMATTED|EXTENDED must refer to
a table");
- }
- // Describing a nested collection.
- Preconditions.checkState(outputStyle_ == TDescribeOutputStyle.MINIMAL);
- resultStruct_ = Path.getTypeAsStruct(path_.destType());
- } else {
+ path_.getRootTable().getName(),
+ path_.getRawPath().get(0), path_.getRootTable().getOwnerUser())
+ .any()
+ .build());
+
+ if (!path_.destType().isComplexType()) {
throw new AnalysisException("Cannot describe path '" +
Joiner.on('.').join(rawPath_) + "' targeting scalar type: " +
path_.destType().toSql());
}
+
+ if (outputStyle_ == TDescribeOutputStyle.FORMATTED ||
+ outputStyle_ == TDescribeOutputStyle.EXTENDED) {
+ throw new AnalysisException("DESCRIBE FORMATTED|EXTENDED must refer to a
table");
+ }
+
+ // Describing a complex type.
+ Preconditions.checkState(outputStyle_ == TDescribeOutputStyle.MINIMAL);
+ resultStruct_ = Path.getTypeAsStruct(path_.destType());
}
private void checkMinimalForIcebergMetadataTable() throws AnalysisException {
diff --git a/fe/src/main/java/org/apache/impala/service/Frontend.java
b/fe/src/main/java/org/apache/impala/service/Frontend.java
index 25a4be16a..4d9e0c55c 100644
--- a/fe/src/main/java/org/apache/impala/service/Frontend.java
+++ b/fe/src/main/java/org/apache/impala/service/Frontend.java
@@ -657,19 +657,20 @@ public class Frontend {
new TColumn("name", Type.STRING.toThrift()),
new TColumn("type", Type.STRING.toThrift()),
new TColumn("comment", Type.STRING.toThrift()));
- if (descStmt.getTable() instanceof FeKuduTable
+ if (descStmt.targetsTable()
&& descStmt.getOutputStyle() == TDescribeOutputStyle.MINIMAL) {
- columns.add(new TColumn("primary_key", Type.STRING.toThrift()));
- columns.add(new TColumn("key_unique", Type.STRING.toThrift()));
- columns.add(new TColumn("nullable", Type.STRING.toThrift()));
- columns.add(new TColumn("default_value", Type.STRING.toThrift()));
- columns.add(new TColumn("encoding", Type.STRING.toThrift()));
- columns.add(new TColumn("compression", Type.STRING.toThrift()));
- columns.add(new TColumn("block_size", Type.STRING.toThrift()));
- } else if ((descStmt.getTable() instanceof FeIcebergTable
- || descStmt.getTable() instanceof IcebergMetadataTable)
- && descStmt.getOutputStyle() == TDescribeOutputStyle.MINIMAL) {
- columns.add(new TColumn("nullable", Type.STRING.toThrift()));
+ if (descStmt.getTable() instanceof FeKuduTable) {
+ columns.add(new TColumn("primary_key", Type.STRING.toThrift()));
+ columns.add(new TColumn("key_unique", Type.STRING.toThrift()));
+ columns.add(new TColumn("nullable", Type.STRING.toThrift()));
+ columns.add(new TColumn("default_value", Type.STRING.toThrift()));
+ columns.add(new TColumn("encoding", Type.STRING.toThrift()));
+ columns.add(new TColumn("compression", Type.STRING.toThrift()));
+ columns.add(new TColumn("block_size", Type.STRING.toThrift()));
+ } else if ((descStmt.getTable() instanceof FeIcebergTable
+ || descStmt.getTable() instanceof IcebergMetadataTable)) {
+ columns.add(new TColumn("nullable", Type.STRING.toThrift()));
+ }
}
metadata.setColumns(columns);
} else if (analysis.isAlterTableStmt()) {
diff --git
a/testdata/workloads/functional-query/queries/QueryTest/describe-path.test
b/testdata/workloads/functional-query/queries/QueryTest/describe-path.test
index f56b0d380..b6994c458 100644
--- a/testdata/workloads/functional-query/queries/QueryTest/describe-path.test
+++ b/testdata/workloads/functional-query/queries/QueryTest/describe-path.test
@@ -187,3 +187,60 @@ describe functional_orc_def.complextypestbl
---- TYPES
string, string, string
====
+
+---- QUERY
+# Describe array from Iceberg table (regression test for IMPALA-12845).
+describe functional_parquet.iceberg_resolution_test_external.phone
+---- RESULTS
+'item','string',''
+'pos','bigint',''
+---- TYPES
+string,string,string
+====
+---- QUERY
+# Describe map from Iceberg table (regression test for IMPALA-12845).
+describe functional_parquet.iceberg_resolution_test_external.favorites
+---- RESULTS
+'key','string',''
+'value','string',''
+---- TYPES
+string,string,string
+====
+---- QUERY
+# Describe struct from Iceberg table (regression test for IMPALA-12845).
+describe functional_parquet.iceberg_resolution_test_external.current_address
+---- RESULTS
+'street_address','struct<\n street_number:int,\n street_name:string,\n
street_type:string\n>',''
+'country','string',''
+'postal_code','string',''
+---- TYPES
+string,string,string
+====
+
+---- QUERY
+# Describe array from Iceberg metadata table (regression test for
IMPALA-12845).
+describe
functional_parquet.iceberg_resolution_test_external.all_files.equality_ids
+---- RESULTS
+'item','int',''
+'pos','bigint',''
+---- TYPES
+string,string,string
+====
+---- QUERY
+# Describe map from Iceberg metadata table (regression test for IMPALA-12845).
+describe
functional_parquet.iceberg_resolution_test_external.all_files.column_sizes
+---- RESULTS
+'key','int',''
+'value','bigint',''
+---- TYPES
+string,string,string
+====
+---- QUERY
+# Describe struct from Iceberg metadata table (regression test for
IMPALA-12845).
+describe
functional_parquet.iceberg_resolution_test_external.all_files.`partition`
+---- RESULTS
+'event_time_hour','int',''
+'action','string',''
+---- TYPES
+string,string,string
+====