This is an automated email from the ASF dual-hosted git repository.

michaelsmith pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git


The following commit(s) were added to refs/heads/master by this push:
     new a62344720 IMPALA-12899: Temporary workaround for BINARY in complex 
types
a62344720 is described below

commit a6234472066e6fab4815568e63061df5930e4c2c
Author: Daniel Becker <[email protected]>
AuthorDate: Thu Mar 14 18:29:02 2024 +0100

    IMPALA-12899: Temporary workaround for BINARY in complex types
    
    The BINARY type is currently not supported inside complex types and a
    cross-component decision is probably needed to support it (see
    IMPALA-11491). We would like to enable EXPAND_COMPLEX_TYPES for Iceberg
    metadata tables (IMPALA-12612), which requires that queries with BINARY
    inside complex types don't fail. Enabling EXPAND_COMPLEX_TYPES is a more
    prioritised issue than IMPALA-11491, so we have come up with a
    temporary solution.
    
    This change NULLs out BINARY values in complex types coming from Iceberg
    metadata tables and logs a warning.
    
    BINARYs in complex types from regular tables are not affected by this
    change.
    
    Testing:
     - Added test queries in iceberg-metadata-tables.test.
    
    Change-Id: I0d834126c7d702a25e957bb6071ecbf0fda2c203
    Reviewed-on: http://gerrit.cloudera.org:8080/21219
    Reviewed-by: Gabor Kaszab <[email protected]>
    Tested-by: Impala Public Jenkins <[email protected]>
---
 be/src/exec/iceberg-metadata/iceberg-row-reader.cc | 12 +++++---
 .../java/org/apache/impala/analysis/Analyzer.java  |  5 ++-
 .../main/java/org/apache/impala/analysis/Path.java | 13 ++++++++
 .../java/org/apache/impala/analysis/SlotRef.java   |  4 ++-
 .../queries/QueryTest/iceberg-metadata-tables.test | 36 ++++++++++++++++++++++
 5 files changed, 64 insertions(+), 6 deletions(-)

diff --git a/be/src/exec/iceberg-metadata/iceberg-row-reader.cc 
b/be/src/exec/iceberg-metadata/iceberg-row-reader.cc
index be6e0df9f..ea7f20532 100644
--- a/be/src/exec/iceberg-metadata/iceberg-row-reader.cc
+++ b/be/src/exec/iceberg-metadata/iceberg-row-reader.cc
@@ -89,7 +89,8 @@ Status IcebergRowReader::WriteSlot(JNIEnv* env, const 
jobject* struct_like_row,
     return Status::OK();
   }
   void* slot = tuple->GetSlot(slot_desc->tuple_offset());
-  switch (slot_desc->type().type) {
+  const ColumnType& type = slot_desc->type();
+  switch (type.type) {
     case TYPE_BOOLEAN: { // java.lang.Boolean
       RETURN_IF_ERROR(WriteBooleanSlot(env, accessed_value, slot));
       break;
@@ -103,7 +104,12 @@ Status IcebergRowReader::WriteSlot(JNIEnv* env, const 
jobject* struct_like_row,
       RETURN_IF_ERROR(WriteTimeStampSlot(env, accessed_value, slot));
       break;
     } case TYPE_STRING: { // java.lang.String
-      RETURN_IF_ERROR(WriteStringSlot(env, accessed_value, slot, 
tuple_data_pool));
+      if (type.IsBinaryType()) {
+        // TODO IMPALA-12651,IMPALA-11491: Display BINARY correctly instead of 
NULLing it.
+        tuple->SetNull(slot_desc->null_indicator_offset());
+      } else {
+        RETURN_IF_ERROR(WriteStringSlot(env, accessed_value, slot, 
tuple_data_pool));
+      }
       break;
     } case TYPE_STRUCT: { // Struct type is not used by Impala to access 
values.
       DCHECK(struct_like_row != nullptr);
@@ -127,8 +133,6 @@ Status IcebergRowReader::WriteSlot(JNIEnv* env, const 
jobject* struct_like_row,
   return Status::OK();
 }
 
-
-
 Status IcebergRowReader::WriteBooleanSlot(JNIEnv* env, const jobject 
&accessed_value,
     void* slot) {
   DCHECK(accessed_value != nullptr);
diff --git a/fe/src/main/java/org/apache/impala/analysis/Analyzer.java 
b/fe/src/main/java/org/apache/impala/analysis/Analyzer.java
index cd19f4f6f..08689d2c7 100644
--- a/fe/src/main/java/org/apache/impala/analysis/Analyzer.java
+++ b/fe/src/main/java/org/apache/impala/analysis/Analyzer.java
@@ -1801,7 +1801,10 @@ public class Analyzer {
     boolean isResolved = resolvedPath.resolve();
     Preconditions.checkState(isResolved);
 
-    if (resolvedPath.destType().isBinary()) {
+    if (resolvedPath.destType().isBinary() &&
+        !collTblRef.getResolvedPath().comesFromIcebergMetadataTable()) {
+      // We allow BINARY fields in collections from Iceberg metadata tables 
but NULL them
+      // out.
       throw new AnalysisException(
           "Binary type inside collection types is not supported 
(IMPALA-11491).");
     }
diff --git a/fe/src/main/java/org/apache/impala/analysis/Path.java 
b/fe/src/main/java/org/apache/impala/analysis/Path.java
index f26c63ab0..5083527c6 100644
--- a/fe/src/main/java/org/apache/impala/analysis/Path.java
+++ b/fe/src/main/java/org/apache/impala/analysis/Path.java
@@ -456,6 +456,19 @@ public class Path {
     return result;
   }
 
+  /**
+   * Returns whether the given path belongs to a (possibly nested) field from 
an Iceberg
+   * metadata table.
+   */
+  public boolean comesFromIcebergMetadataTable() {
+    Preconditions.checkState(rootTable_ != null || rootDesc_ != null);
+    if (rootDesc_ != null) {
+      return rootDesc_.getTable() instanceof IcebergMetadataTable;
+    } else {
+      return rootTable_ instanceof IcebergMetadataTable;
+    }
+  }
+
   /**
    * Returns the absolute explicit path starting from the fully-qualified 
table name.
    * The goal is produce a canonical non-ambiguous path that can be used as an
diff --git a/fe/src/main/java/org/apache/impala/analysis/SlotRef.java 
b/fe/src/main/java/org/apache/impala/analysis/SlotRef.java
index 33aef0458..905b080ba 100644
--- a/fe/src/main/java/org/apache/impala/analysis/SlotRef.java
+++ b/fe/src/main/java/org/apache/impala/analysis/SlotRef.java
@@ -214,7 +214,9 @@ public class SlotRef extends Expr {
         throw new AnalysisException("Unsupported type '"
             + fieldType.toSql() + "' in '" + toSql() + "'.");
       }
-      if (fieldType.isBinary()) {
+      if (fieldType.isBinary() && 
!desc_.getPath().comesFromIcebergMetadataTable()) {
+        // We allow BINARY fields in collections from Iceberg metadata tables 
but NULL
+        // them out.
         throw new AnalysisException("Struct containing a BINARY type is not " +
             "allowed in the select list (IMPALA-11491).");
       }
diff --git 
a/testdata/workloads/functional-query/queries/QueryTest/iceberg-metadata-tables.test
 
b/testdata/workloads/functional-query/queries/QueryTest/iceberg-metadata-tables.test
index 291fdc8b1..d83d31cd2 100644
--- 
a/testdata/workloads/functional-query/queries/QueryTest/iceberg-metadata-tables.test
+++ 
b/testdata/workloads/functional-query/queries/QueryTest/iceberg-metadata-tables.test
@@ -792,6 +792,42 @@ select column_sizes, value_counts, split_offsets, 
equality_ids from functional_p
 STRING,STRING,STRING,STRING
 ====
 
+####
+# Query BINARY elements of complex types;
+####
+---- QUERY
+select lower_bounds, upper_bounds from 
functional_parquet.iceberg_query_metadata.all_files;
+---- RESULTS
+'{1:null}','{1:null}'
+'{1:null}','{1:null}'
+'{2147483546:null,2147483545:null}','{2147483546:null,2147483545:null}'
+'{1:null}','{1:null}'
+---- TYPES
+STRING,STRING
+====
+---- QUERY
+select data_file from functional_parquet.iceberg_query_metadata.entries;
+---- RESULTS
+row_regex:'{"content":0,"file_path":".*/test-warehouse/iceberg_test/hadoop_catalog/ice/iceberg_query_metadata/data/.*_data.0.parq","file_format":"PARQUET","spec_id":0,"record_count":1,"file_size_in_bytes":351,"column_sizes":{1:47},"value_counts":{1:1},"null_value_counts":{1:0},"nan_value_counts":null,"lower_bounds":{1:null},"upper_bounds":{1:null},"key_metadata":null,"split_offsets":null,"equality_ids":null,"sort_order_id":0}'
+row_regex:'{"content":0,"file_path":".*/test-warehouse/iceberg_test/hadoop_catalog/ice/iceberg_query_metadata/data/.*_data.0.parq","file_format":"PARQUET","spec_id":0,"record_count":1,"file_size_in_bytes":351,"column_sizes":{1:47},"value_counts":{1:1},"null_value_counts":{1:0},"nan_value_counts":null,"lower_bounds":{1:null},"upper_bounds":{1:null},"key_metadata":null,"split_offsets":null,"equality_ids":null,"sort_order_id":0}'
+row_regex:'{"content":0,"file_path":".*/test-warehouse/iceberg_test/hadoop_catalog/ice/iceberg_query_metadata/data/.*_data.0.parq","file_format":"PARQUET","spec_id":0,"record_count":1,"file_size_in_bytes":351,"column_sizes":{1:47},"value_counts":{1:1},"null_value_counts":{1:0},"nan_value_counts":null,"lower_bounds":{1:null},"upper_bounds":{1:null},"key_metadata":null,"split_offsets":null,"equality_ids":null,"sort_order_id":0}'
+row_regex:'{"content":1,"file_path":".*/test-warehouse/iceberg_test/hadoop_catalog/ice/iceberg_query_metadata/data/delete-.*_data.0.parq","file_format":"PARQUET","spec_id":0,"record_count":1,"file_size_in_bytes":[1-9][0-9]*,"column_sizes":{2147483546:[1-9][0-9]*,2147483545:[1-9][0-9]*},"value_counts":{2147483546:1,2147483545:1},"null_value_counts":{2147483546:0,2147483545:0},"nan_value_counts":null,"lower_bounds":{2147483546:null,2147483545:null},"upper_bounds":{2147483546:null,214748354
 [...]
+---- TYPES
+STRING
+====
+---- QUERY
+set EXPAND_COMPLEX_TYPES=1;
+select * from functional_parquet.iceberg_v2_delete_both_eq_and_pos.all_files;
+---- RESULTS
+1,'/test-warehouse/iceberg_test/hadoop_catalog/ice/iceberg_v2_delete_both_eq_and_pos/data/delete-074a9e19e61b766e-652a169e00000001_800513971_data.0.parq','PARQUET',0,1,1606,'{2147483546:215,2147483545:51}','{2147483546:1,2147483545:1}','{2147483546:0,2147483545:0}','NULL','{2147483546:null,2147483545:null}','{2147483546:null,2147483545:null}','NULL','NULL','NULL',NULL,'{"d":{"column_size":null,"value_count":null,"null_value_count":null,"nan_value_count":null,"lower_bound":null,"upper_bou
 [...]
+2,'/test-warehouse/iceberg_test/hadoop_catalog/ice/iceberg_v2_delete_both_eq_and_pos/data/00000-0-38a471ff-46f4-4350-85cc-2e7ba946b34c-00002.parquet','PARQUET',0,2,697,'{1:40,3:66}','{1:2,3:2}','{1:0,3:0}','{}','{1:null,3:null}','{1:null,3:null}','NULL','[4]','[1,3]',0,'{"d":{"column_size":66,"value_count":2,"null_value_count":0,"nan_value_count":null,"lower_bound":null,"upper_bound":null},"i":{"column_size":40,"value_count":2,"null_value_count":0,"nan_value_count":null,"lower_bound":1,"
 [...]
+0,'/test-warehouse/iceberg_test/hadoop_catalog/ice/iceberg_v2_delete_both_eq_and_pos/data/00000-0-72709aba-fb15-4bd6-9758-5f39eb9bdcb7-00001.parquet','PARQUET',0,2,885,'{1:40,2:62,3:40}','{1:2,2:2,3:2}','{1:0,2:0,3:0}','{}','{1:null,2:null,3:null}','{1:null,2:null,3:null}','NULL','[4]','NULL',0,'{"d":{"column_size":40,"value_count":2,"null_value_count":0,"nan_value_count":null,"lower_bound":null,"upper_bound":null},"i":{"column_size":40,"value_count":2,"null_value_count":0,"nan_value_cou
 [...]
+0,'/test-warehouse/iceberg_test/hadoop_catalog/ice/iceberg_v2_delete_both_eq_and_pos/data/00000-0-38a471ff-46f4-4350-85cc-2e7ba946b34c-00001.parquet','PARQUET',0,2,898,'{1:40,2:54,3:66}','{1:2,2:2,3:2}','{1:0,2:0,3:0}','{}','{1:null,2:null,3:null}','{1:null,2:null,3:null}','NULL','[4]','NULL',0,'{"d":{"column_size":66,"value_count":2,"null_value_count":0,"nan_value_count":null,"lower_bound":null,"upper_bound":null},"i":{"column_size":40,"value_count":2,"null_value_count":0,"nan_value_cou
 [...]
+2,'/test-warehouse/iceberg_test/hadoop_catalog/ice/iceberg_v2_delete_both_eq_and_pos/data/00000-0-72709aba-fb15-4bd6-9758-5f39eb9bdcb7-00002.parquet','PARQUET',0,2,657,'{1:40,3:40}','{1:2,3:2}','{1:0,3:0}','{}','{1:null,3:null}','{1:null,3:null}','NULL','[4]','[1,3]',0,'{"d":{"column_size":40,"value_count":2,"null_value_count":0,"nan_value_count":null,"lower_bound":null,"upper_bound":null},"i":{"column_size":40,"value_count":2,"null_value_count":0,"nan_value_count":null,"lower_bound":2,"
 [...]
+---- TYPES
+INT,STRING,STRING,INT,BIGINT,BIGINT,STRING,STRING,STRING,STRING,STRING,STRING,BINARY,STRING,STRING,INT,STRING
+====
+
 ####
 # Describe all the metadata tables once
 ####

Reply via email to