This is an automated email from the ASF dual-hosted git repository.

stigahuang pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git

commit 825e059dc5c1bf325a61b588f1d9615d569ee04b
Author: Daniel Becker <[email protected]>
AuthorDate: Thu Jan 11 15:18:47 2024 +0100

    IMPALA-12695: Crash with UNION with complex types
    
    If we unnest an array coming from a UNION ALL, we read invalid memory
    and in ASAN builds we crash.
    
    Example:
      with v as (select arr1 from complextypes_arrays
        union all select arr1 from complextypes_arrays)
      select am.item from v, v.arr1 am;
    
    The problem seems to be that in the item tuple of the collections, the
    item slots are present twice. This is because both the inline view
    analyzer and the main analyzer add slots with the same path to the
    tuple. This is possible because
      - the target tuple is determined based on the path via
        Path.getRootDesc(), so it will be the same both in the inline view
        and in the main scope
        AND
      - the inline view analyzer and the main one do not share
        'slotPathMap_', so the analyzer cannot recognise that a slot for the
        path has already been added.
    
    This commit solves the problem by checking the target tuple whether a
    slot with the same path already exists in it, and if it does, we reuse
    that slot. Note, however, that when Analyzer.registerSlotRef() is called
    with 'duplicateIfCollections=true', a separate slot is added for
    collections which should not be reused. This commit adds a set,
    'duplicateCollectionSlots', in Analyzer.GlobalState to keep track of
    such collection slots, and these slots are never reused.
    
    Note that there is another bug, IMPALA-12753, that a predicate on the
    collection item in the above query is only enforced on the first child
    of the union. Therefore this commit disallows placing a predicate on a
    collection item when the unnested collection comes from a union.
    
    Testing:
     - added test queries in nested-array-in-select-list.test,
       nested-map-in-select-list.test, zipping-unnest-in-from-clause.test
       and zipping-unnest-in-select-list.test
    
    Change-Id: I340adc50e6d7cda6f59dacd7a46b6adc31635d46
    Reviewed-on: http://gerrit.cloudera.org:8080/20953
    Reviewed-by: Impala Public Jenkins <[email protected]>
    Tested-by: Impala Public Jenkins <[email protected]>
---
 .../java/org/apache/impala/analysis/Analyzer.java  |  38 +++++-
 .../org/apache/impala/analysis/InlineViewRef.java  |   1 +
 .../java/org/apache/impala/planner/UnnestNode.java |  60 +++++++++
 .../QueryTest/nested-array-in-select-list.test     | 137 +++++++++++++++++++++
 .../QueryTest/nested-map-in-select-list.test       |  37 +++++-
 .../QueryTest/zipping-unnest-in-from-clause.test   |  52 ++++++++
 .../QueryTest/zipping-unnest-in-select-list.test   |  52 ++++++++
 7 files changed, 373 insertions(+), 4 deletions(-)

diff --git a/fe/src/main/java/org/apache/impala/analysis/Analyzer.java 
b/fe/src/main/java/org/apache/impala/analysis/Analyzer.java
index 600cc9a8f..8c2194728 100644
--- a/fe/src/main/java/org/apache/impala/analysis/Analyzer.java
+++ b/fe/src/main/java/org/apache/impala/analysis/Analyzer.java
@@ -604,6 +604,10 @@ public class Analyzer {
     // Shows how many zipping unnests were in the query;
     public int numZippingUnnests = 0;
 
+    // Ids of (collection-typed) SlotDescriptors that were registered with
+    // 'duplicateIfCollections=true' in 'Analyzer.registerSlotRef()'.
+    public Set<SlotId> duplicateCollectionSlots = new HashSet<>();
+
     public GlobalState(StmtTableCache stmtTableCache, TQueryCtx queryCtx,
         AuthorizationFactory authzFactory, AuthorizationContext authzCtx) {
       this.stmtTableCache = stmtTableCache;
@@ -1600,7 +1604,9 @@ public class Analyzer {
       if (slotPath.destType().isCollectionType() && duplicateIfCollections) {
         // Register a new slot descriptor for collection types. The BE 
currently
         // relies on this behavior for setting unnested collection slots to 
NULL.
-        return createAndRegisterRawSlotDesc(slotPath, false);
+        SlotDescriptor res = createAndRegisterRawSlotDesc(slotPath, false);
+        globalState_.duplicateCollectionSlots.add(res.getId());
+        return res;
       }
       // SlotRefs with scalar or struct types are registered against the slot's
       // fully-qualified lowercase path.
@@ -1610,10 +1616,40 @@ public class Analyzer {
       SlotDescriptor existingSlotDesc = slotPathMap_.get(key);
       if (existingSlotDesc != null) return existingSlotDesc;
 
+      SlotDescriptor existingInTuple = findPathInCurrentTuple(slotPath);
+      if (existingInTuple != null) return existingInTuple;
+
       return createAndRegisterSlotDesc(slotPath);
     }
   }
 
+  // It is possible that another Analyzer, for example a child Analyzer in an 
inline view,
+  // has already inserted a SlotDescriptor with the current path in the 
current tuple. But
+  // because it was a different Analyzer, the path is not present in this 
Analyzer's
+  // 'slotPathMap_'. We should reuse the existing SlotDescriptor unless it was 
explicitly
+  // added with 'duplicateIfCollections=true' in 'registerSlotRef()'.
+  //
+  // Returns the existing SlotDescriptor with the same path in the current 
tuple if it
+  // exists or 'null' if it doesn't.
+  private SlotDescriptor findPathInCurrentTuple(Path slotPath) {
+    Preconditions.checkNotNull(slotPath);
+
+    TupleDescriptor currentTupleDesc = tupleStack_.peek();
+    Preconditions.checkNotNull(currentTupleDesc);
+
+    final List<String> slotPathFQR = slotPath.getFullyQualifiedRawPath();
+    Preconditions.checkNotNull(slotPathFQR);
+
+    for (SlotDescriptor slotDesc : currentTupleDesc.getSlots()) {
+      final List<String> tupleSlotFQR = 
slotDesc.getPath().getFullyQualifiedRawPath();
+      if (slotPathFQR.equals(tupleSlotFQR) &&
+            !globalState_.duplicateCollectionSlots.contains(slotDesc.getId())) 
{
+        return slotDesc;
+      }
+    }
+    return null;
+  }
+
   private SlotDescriptor createAndRegisterSlotDesc(Path slotPath)
       throws AnalysisException {
     if (slotPath.destType().isCollectionType()) {
diff --git a/fe/src/main/java/org/apache/impala/analysis/InlineViewRef.java 
b/fe/src/main/java/org/apache/impala/analysis/InlineViewRef.java
index 2a57e6ec8..4ad3073dc 100644
--- a/fe/src/main/java/org/apache/impala/analysis/InlineViewRef.java
+++ b/fe/src/main/java/org/apache/impala/analysis/InlineViewRef.java
@@ -372,6 +372,7 @@ public class InlineViewRef extends TableRef {
         // We don't recurse deeper and only add the immediate item child to the
         // substitution map. This is enough both for collections in select 
list and in
         // from clause.
+        // TODO: Revisit for IMPALA-12160.
         putExprsIntoSmaps(analyzer, itemSlotDesc, srcItemSlotRef, 
baseTableItemSlotRef,
             false);
       }
diff --git a/fe/src/main/java/org/apache/impala/planner/UnnestNode.java 
b/fe/src/main/java/org/apache/impala/planner/UnnestNode.java
index 919d849e3..7bebe83ab 100644
--- a/fe/src/main/java/org/apache/impala/planner/UnnestNode.java
+++ b/fe/src/main/java/org/apache/impala/planner/UnnestNode.java
@@ -17,13 +17,18 @@
 
 package org.apache.impala.planner;
 
+import java.util.ArrayList;
 import java.util.List;
 
 import org.apache.impala.analysis.Analyzer;
 import org.apache.impala.analysis.CollectionTableRef;
 import org.apache.impala.analysis.Expr;
+import org.apache.impala.analysis.SlotDescriptor;
 import org.apache.impala.analysis.SlotRef;
 import org.apache.impala.analysis.ToSqlUtils;
+import org.apache.impala.analysis.TupleDescriptor;
+import org.apache.impala.analysis.TupleId;
+import org.apache.impala.common.AnalysisException;
 import org.apache.impala.common.ImpalaException;
 import org.apache.impala.thrift.TExplainLevel;
 import org.apache.impala.thrift.TPlanNode;
@@ -79,6 +84,61 @@ public class UnnestNode extends PlanNode {
     // Unnest is like a scan and must materialize the slots of its conjuncts.
     analyzer.materializeSlots(conjuncts_);
     computeMemLayout(analyzer);
+
+    checkUnnestFromUnionWithPredicate(analyzer);
+  }
+
+  // Filtering an unnested collection that comes from a UNION [ALL] is not 
supported, see
+  // IMPALA-12753.
+  private void checkUnnestFromUnionWithPredicate(Analyzer analyzer)
+      throws AnalysisException {
+    PlanNode subplanInputNode = containingSubplanNode_.getChild(0);
+    if (!(subplanInputNode instanceof UnionNode)) return;
+
+    UnionNode union = (UnionNode) subplanInputNode;
+
+    // Tuple descriptors of the UNION and their descendants (for complex 
types).
+    List<TupleDescriptor> unionDescs = new ArrayList<>();
+    for (TupleId tid : union.getTupleIds()) {
+      TupleDescriptor tuple = analyzer.getDescTbl().getTupleDesc(tid);
+      getCollTupleDescs(tuple, unionDescs);
+    }
+
+    for (CollectionTableRef collTblRef : tblRefs_) {
+      final TupleDescriptor collItemTupleDesc = collTblRef.getDesc();
+
+      if (!unionDescs.contains(collItemTupleDesc)) continue;
+
+      List<Expr> predicates = analyzer.getConjuncts();
+      for (Expr pred : predicates) {
+        if (!pred.isAuxExpr()) {
+          List<Expr> matching = new ArrayList();
+          pred.collect(expr -> (expr instanceof SlotRef) &&
+              ((SlotRef) expr).getDesc().getParent().equals(collItemTupleDesc),
+              matching);
+          if (!matching.isEmpty()) {
+            throw new AnalysisException("Filtering an unnested collection that 
comes " +
+                "from a UNION [ALL] is not supported yet.");
+          }
+        }
+      }
+    }
+  }
+
+  // Returns the TupleDescriptors contained by 'tuple' (includes item tuple 
descs of
+  // collections).
+  private void getCollTupleDescs(TupleDescriptor tuple,
+      List<TupleDescriptor> tupleList) {
+    tupleList.add(tuple);
+    for (SlotDescriptor slot : tuple.getSlots()) {
+      if (slot.getType().isCollectionType()) {
+        TupleDescriptor itemTuple = slot.getItemTupleDesc();
+        Preconditions.checkNotNull(itemTuple);
+        tupleList.add(itemTuple);
+        // TODO: Continue recursively (for collections and probably also
+        // structs) once IMPALA-12751 is solved.
+      }
+    }
   }
 
   @Override
diff --git 
a/testdata/workloads/functional-query/queries/QueryTest/nested-array-in-select-list.test
 
b/testdata/workloads/functional-query/queries/QueryTest/nested-array-in-select-list.test
index 13dae41ea..a874f8878 100644
--- 
a/testdata/workloads/functional-query/queries/QueryTest/nested-array-in-select-list.test
+++ 
b/testdata/workloads/functional-query/queries/QueryTest/nested-array-in-select-list.test
@@ -150,6 +150,143 @@ select 1 from (select int_array from complextypestbl) s
 tinyint
 ====
 ---- QUERY
+# Regression test for IMPALA-12695.
+with v as (select arr1 from complextypes_arrays
+  union all select arr1 from complextypes_arrays)
+select am.item from v, v.arr1 am;
+---- RESULTS
+1
+2
+3
+4
+5
+1
+NULL
+3
+4
+5
+10
+9
+8
+10
+10
+NULL
+12
+1
+2
+1
+2
+3
+1
+2
+3
+4
+5
+1
+NULL
+3
+4
+5
+10
+9
+8
+10
+10
+NULL
+12
+1
+2
+1
+2
+3
+---- TYPES
+int
+====
+---- QUERY
+# Filtering an unnested collection that comes from a UNION [ALL] is not 
supported, see
+# IMPALA-12753.
+with v as (select arr1 from complextypes_arrays
+  union all select arr1 from complextypes_arrays)
+select am.item from v, v.arr1 am where am.item is NULL;
+---- CATCH
+AnalysisException: Filtering an unnested collection that comes from a UNION 
[ALL] is not supported yet.
+====
+---- QUERY
+# Regression test for IMPALA-12695 with varlen collection item and an 
additional inline view layer.
+with v0 as (select arr2 from complextypes_arrays
+  union all select arr2 from complextypes_arrays),
+     v1 as (select am.item from v0, v0.arr2 am)
+select item from v1;
+---- RESULTS
+'one'
+'two'
+'three'
+'four'
+'five'
+'one'
+'two'
+'three'
+'NULL'
+'five'
+'ten'
+'ten'
+'nine'
+'eight'
+'ten'
+'eleven'
+'twelve'
+'thirteen'
+'str1'
+'str2'
+'str1'
+'str2'
+'one'
+'two'
+'three'
+'four'
+'five'
+'one'
+'two'
+'three'
+'NULL'
+'five'
+'ten'
+'ten'
+'nine'
+'eight'
+'ten'
+'eleven'
+'twelve'
+'thirteen'
+'str1'
+'str2'
+'str1'
+'str2'
+---- TYPES
+string
+====
+---- QUERY
+# Filtering an unnested collection that comes from a UNION [ALL] is not 
supported, see
+# IMPALA-12753.
+with v0 as (select arr2 from complextypes_arrays
+  union all select arr2 from complextypes_arrays),
+     v1 as (select am.item from v0, v0.arr2 am)
+select item from v1 where item="one";
+---- CATCH
+AnalysisException: Filtering an unnested collection that comes from a UNION 
[ALL] is not supported yet.
+====
+---- QUERY
+# Filtering an unnested collection that comes from a UNION [ALL] is not 
supported, see
+# IMPALA-12753. The query contains an extra inline view layer above the union.
+with v0 as (select arr2 from complextypes_arrays
+  union all select arr2 from complextypes_arrays),
+     v1 as (select 1, arr2 from v0),
+     v2 as (select am.item from v1, v1.arr2 am)
+select item from v2 where item="one";
+---- CATCH
+AnalysisException: Filtering an unnested collection that comes from a UNION 
[ALL] is not supported yet.
+====
+---- QUERY
 select id, int_array from (select id, int_array from complextypestbl) s;
 ---- RESULTS
 1,'[1,2,3]'
diff --git 
a/testdata/workloads/functional-query/queries/QueryTest/nested-map-in-select-list.test
 
b/testdata/workloads/functional-query/queries/QueryTest/nested-map-in-select-list.test
index e21747c08..efec3b7c5 100644
--- 
a/testdata/workloads/functional-query/queries/QueryTest/nested-map-in-select-list.test
+++ 
b/testdata/workloads/functional-query/queries/QueryTest/nested-map-in-select-list.test
@@ -169,9 +169,6 @@ tinyint,string
 ====
 ---- QUERY
 # Constants in the select list of unions also lead to a "non-pass-through" 
union.
-select 1, int_map, int_map_array from complextypestbl
-  union all select 2, int_map, int_map_array from complextypestbl;
-
 select 1, map_1d, map_3d from collection_tbl
   union all select 2, map_1d, map_3d from collection_tbl
 ---- RESULTS
@@ -210,6 +207,40 @@ select 1 from (select int_map from complextypestbl) s
 tinyint
 ====
 ---- QUERY
+# Regression test for IMPALA-12695.
+with v0 as (select int_map from complextypestbl
+  union all select int_map from complextypestbl),
+     v1 as (select m.key from v0, v0.int_map m)
+select key from v1;
+---- RESULTS
+'k1'
+'k1'
+'k2'
+'k1'
+'k2'
+'k1'
+'k3'
+'k1'
+'k1'
+'k2'
+'k1'
+'k2'
+'k1'
+'k3'
+---- TYPES
+string
+====
+---- QUERY
+# Filtering an unnested collection that comes from a UNION [ALL] is not 
supported, see
+# IMPALA-12753.
+with v0 as (select int_map from complextypestbl
+  union all select int_map from complextypestbl),
+     v1 as (select m.key from v0, v0.int_map m)
+select key from v1 where key="k1";
+---- CATCH
+AnalysisException: Filtering an unnested collection that comes from a UNION 
[ALL] is not supported yet.
+====
+---- QUERY
 select id, int_map from (select id, int_map from complextypestbl) s;
 ---- RESULTS
 1,'{"k1":1,"k2":100}'
diff --git 
a/testdata/workloads/functional-query/queries/QueryTest/zipping-unnest-in-from-clause.test
 
b/testdata/workloads/functional-query/queries/QueryTest/zipping-unnest-in-from-clause.test
index 93b3a02f1..591e36af2 100644
--- 
a/testdata/workloads/functional-query/queries/QueryTest/zipping-unnest-in-from-clause.test
+++ 
b/testdata/workloads/functional-query/queries/QueryTest/zipping-unnest-in-from-clause.test
@@ -409,3 +409,55 @@ where t1.id = 3 and t2.id < 3;
 3,2,8,'NULL'
 ---- TYPES
 INT,INT,INT,STRING
+====
+---- QUERY
+with v as (select arr1 from complextypes_arrays
+  union all select arr1 from complextypes_arrays)
+select arr1.item from v, unnest(v.arr1);
+---- RESULTS
+1
+2
+3
+4
+5
+1
+NULL
+3
+4
+5
+10
+9
+8
+10
+10
+NULL
+12
+1
+2
+1
+2
+3
+1
+2
+3
+4
+5
+1
+NULL
+3
+4
+5
+10
+9
+8
+10
+10
+NULL
+12
+1
+2
+1
+2
+3
+---- TYPES
+INT
diff --git 
a/testdata/workloads/functional-query/queries/QueryTest/zipping-unnest-in-select-list.test
 
b/testdata/workloads/functional-query/queries/QueryTest/zipping-unnest-in-select-list.test
index e36c05acf..d471b86ac 100644
--- 
a/testdata/workloads/functional-query/queries/QueryTest/zipping-unnest-in-select-list.test
+++ 
b/testdata/workloads/functional-query/queries/QueryTest/zipping-unnest-in-select-list.test
@@ -230,3 +230,55 @@ where arr1.item < 3;
 10,2,2
 ---- TYPES
 INT,INT,INT
+====
+---- QUERY
+with v as (select arr1 from complextypes_arrays
+  union all select arr1 from complextypes_arrays)
+select unnest(arr1) a from v
+---- RESULTS
+1
+2
+3
+4
+5
+1
+NULL
+3
+4
+5
+10
+9
+8
+10
+10
+NULL
+12
+1
+2
+1
+2
+3
+1
+2
+3
+4
+5
+1
+NULL
+3
+4
+5
+10
+9
+8
+10
+10
+NULL
+12
+1
+2
+1
+2
+3
+---- TYPES
+INT

Reply via email to