Repository: incubator-impala
Updated Branches:
  refs/heads/master 2618766ba -> d0ff27cc3


IMPALA-3938: Disallow implicit references between nested collections.

The Bug: Prior to this patch Impala would accept and execute a query of
the form "select pos, key, value from tbl.arr_map_col order by pos asc",
where "arr_map_col" is an array of map<?, ?>. In this example, the map
is not referenced as a collection and therefore the implicit rule of
using "item" should not be applied. The results produced from this
erroneous query are arbitrary. The semantically correct way of
expressing the query would be of the form "select a.pos, m.key, m.value
from tbl t, t.arr_map_col a, a.item m order by a.pos asc".

The Fix: Removed the existing 'expectExplicitMatch' flag to simplify the
code. During query analysis (more specifically: path resolution), if
part of a path cannot be explicitly matched then the planner will fall
back to an implicit field check. As part of this check, the type of the
nested field inside the collection will be inspected to verify that it
does not match that of a collection type. If so, the path resolution will
fail and an AnalysisException will be thrown indicating that the path
cannot be resolved. For example, trying to execute the query "select pos,
key, value from tbl.arr_map_col order by pos asc" would yield the
following error "Could not resolve column/field reference: 'key'".
Regarding test coverage, the existing test suite "AnalyzeStmtsTest" has
been augmented with a set of new tests that will verify that implicit
references between collections cannot be used.

Change-Id: I703c7e9c8c9c95c9fee714767367373eec10cd0c
Reviewed-on: http://gerrit.cloudera.org:8080/4079
Reviewed-by: Alex Behm <[email protected]>
Tested-by: Internal Jenkins


Project: http://git-wip-us.apache.org/repos/asf/incubator-impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-impala/commit/d0ff27cc
Tree: http://git-wip-us.apache.org/repos/asf/incubator-impala/tree/d0ff27cc
Diff: http://git-wip-us.apache.org/repos/asf/incubator-impala/diff/d0ff27cc

Branch: refs/heads/master
Commit: d0ff27cc3a7a693ae412fcbf9f8f1709de013855
Parents: 2618766
Author: Christopher Channing <[email protected]>
Authored: Tue Aug 16 14:56:55 2016 +0100
Committer: Internal Jenkins <[email protected]>
Committed: Sat Aug 27 02:21:34 2016 +0000

----------------------------------------------------------------------
 .../java/com/cloudera/impala/analysis/Path.java | 10 +---
 .../impala/analysis/AnalyzeStmtsTest.java       | 60 ++++++++++++++------
 2 files changed, 45 insertions(+), 25 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/d0ff27cc/fe/src/main/java/com/cloudera/impala/analysis/Path.java
----------------------------------------------------------------------
diff --git a/fe/src/main/java/com/cloudera/impala/analysis/Path.java 
b/fe/src/main/java/com/cloudera/impala/analysis/Path.java
index 4e0b128..03c601c 100644
--- a/fe/src/main/java/com/cloudera/impala/analysis/Path.java
+++ b/fe/src/main/java/com/cloudera/impala/analysis/Path.java
@@ -215,8 +215,6 @@ public class Path {
       currentType = rootTable_.getType().getItemType();
     }
 
-    // True if the next raw-path element must match explicitly.
-    boolean expectExplicitMatch = false;
     // Map all remaining raw-path elements to field types and positions.
     while (rawPathIdx < rawPath_.size()) {
       if (!currentType.isComplexType()) return false;
@@ -225,8 +223,10 @@ public class Path {
       StructField field = structType.getField(rawPath_.get(rawPathIdx));
       if (field == null) {
         // Resolve implicit path.
-        if (!expectExplicitMatch && structType instanceof 
CollectionStructType) {
+        if (structType instanceof CollectionStructType) {
           field = ((CollectionStructType) structType).getOptionalField();
+          // Collections must be matched explicitly.
+          if (field.getType().isCollectionType()) return false;
         } else {
           // Failed to resolve implicit or explicit path.
           return false;
@@ -235,13 +235,9 @@ public class Path {
         matchedTypes_.add(field.getType());
         matchedPositions_.add(field.getPosition());
         currentType = field.getType();
-        // After an implicit match there must be an explicit match.
-        expectExplicitMatch = true;
         // Do not consume a raw-path element.
         continue;
       }
-      // After an explicit match we could have an implicit match again.
-      expectExplicitMatch = false;
       matchedTypes_.add(field.getType());
       matchedPositions_.add(field.getPosition());
       if (field.getType().isCollectionType() && firstCollectionPathIdx_ == -1) 
{

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/d0ff27cc/fe/src/test/java/com/cloudera/impala/analysis/AnalyzeStmtsTest.java
----------------------------------------------------------------------
diff --git 
a/fe/src/test/java/com/cloudera/impala/analysis/AnalyzeStmtsTest.java 
b/fe/src/test/java/com/cloudera/impala/analysis/AnalyzeStmtsTest.java
index 33fd6ac..05a0855 100644
--- a/fe/src/test/java/com/cloudera/impala/analysis/AnalyzeStmtsTest.java
+++ b/fe/src/test/java/com/cloudera/impala/analysis/AnalyzeStmtsTest.java
@@ -575,24 +575,48 @@ public class AnalyzeStmtsTest extends AnalyzerTest {
     testTableRefPath("select 1 from d.t7, t7.c5, c5.m2.m3",
         path(4, 1, 1, 1, 2), path(4, 1, 1));
 
-    // Tests that an attempted implicit match must be succeeded by an explicit 
match.
-    addTestTable("create table d.t8 (" +
-        "s1 struct<" +
-        "  s2:struct<" +
-        "    a:array<" +
-        "      array<struct<" +
-        "        e:int,f:string>>>>>)");
-    // Explanation of test:
-    // - d.t8.s1.s2.a resolves to a CollectionStructType with fields 'item' 
and 'pos'
-    // - we are allowed to implicitly skip the 'item' field
-    // - d.t8.s1.s2.a.item again resolves to a CollectionStructType with 
'item' and 'pos'
-    // - however, we are not allowed to implicitly skip 'item' again, since we 
have
-    //   already skipped 'item' previously
-    // - the rule is: an implicit match must be followed by an explicit one
-    AnalysisError("select f from d.t8.s1.s2.a",
-        "Could not resolve column/field reference: 'f'");
-    AnalysisError("select 1 from d.t8.s1.s2.a, a.f",
-        "Could not resolve table reference: 'a.f'");
+    // Tests that implicit references are not allowed through collection types.
+    addTestTable("create table d.t8 ("
+        + "c1 array<map<string, string>>,"
+        + "c2 map<string, array<struct<a:int>>>,"
+        + "c3 struct<s1:struct<a:array<array<struct<e:int, f:string>>>>>)");
+    testImplicitPathFailure("d.t8", true, "c1", "key", "value");
+    testImplicitPathFailure("d.t8", true, "c2", "pos");
+    testImplicitPathFailure("d.t8.c3.s1", false, "a", "f");
+  }
+
+  private void testImplicitPathFailure(String parent, boolean 
parentIsCollection,
+      String collection, String...fields) {
+    String[] parentElements = parent.split("\\.");
+    String implicitAlias = parentElements[parentElements.length - 1];
+    String explicitAlias = "x";
+    for (String field : fields) {
+      // Tests that the path in the select list item does not resolve.
+      AnalysisError(String.format("select %s from %s.%s", field, parent, 
collection),
+          String.format("Could not resolve column/field reference: '%s'", 
field));
+      AnalysisError(String.format("select %s.%s from %s.%s %s",
+          explicitAlias, field, parent, collection, explicitAlias),
+          String.format("Could not resolve column/field reference: '%s.%s'",
+          explicitAlias, field));
+      if (parentIsCollection) {
+        AnalysisError(String.format("select %s from %s join %s.%s",
+            field, parent, implicitAlias, collection),
+            String.format("Could not resolve column/field reference: '%s'", 
field));
+        AnalysisError(String.format("select %s.%s from %s %s join %s.%s",
+            explicitAlias, field, parent, explicitAlias, explicitAlias, 
collection),
+            String.format("Could not resolve column/field reference: '%s.%s'",
+            explicitAlias, field));
+      }
+      // Tests that the path in the last table reference does not resolve.
+      AnalysisError(String.format("select 1 from %s.%s join %s.%s",
+          parent, collection, collection, field),
+          String.format("Could not resolve table reference: '%s.%s'",
+          collection, field));
+      AnalysisError(String.format("select 1 from %s.%s %s join %s.%s",
+          parent, collection, explicitAlias, explicitAlias, field),
+          String.format("Could not resolve table reference: '%s.%s'",
+          explicitAlias, field));
+    }
   }
 
   @Test

Reply via email to