Repository: incubator-impala Updated Branches: refs/heads/master 2618766ba -> d0ff27cc3
IMPALA-3938: Disallow implicit references between nested collections. The Bug: Prior to this patch Impala would accept and execute a query of the form "select pos, key, value from tbl.arr_map_col order by pos asc", where "arr_map_col" is an array of map<?, ?>. In this example, the map is not referenced as a collection and therefore the implicit rule of using "item" should not be applied. The results produced from this erroneous query are arbitrary. The semantically correct way of expressing the query would be of the form "select a.pos, m.key, m.value from tbl t, t.arr_map_col a, a.item m order by a.pos asc". The Fix: Removed the existing 'expectExplicitMatch' flag to simplify the code. During query analysis (more specifically: path resolution), if part of a path cannot be explicitly matched then the planner will fall back to an implicit field check. As part of this check, the type of the nested field inside the collection will be inspected to verify that it does not match that of a collection type. If so, the path resolution will fail and an AnalysisException will be thrown indicating that the path cannot be resolved. For example, trying to execute the query "select pos, key, value from tbl.arr_map_col order by pos asc" would yield the following error "Could not resolve column/field reference: 'key'". Regarding test coverage, the existing test suite "AnalyzeStmtsTest" has been augmented with a set of new tests that will verify that implicit references between collections cannot be used. Change-Id: I703c7e9c8c9c95c9fee714767367373eec10cd0c Reviewed-on: http://gerrit.cloudera.org:8080/4079 Reviewed-by: Alex Behm <[email protected]> Tested-by: Internal Jenkins Project: http://git-wip-us.apache.org/repos/asf/incubator-impala/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-impala/commit/d0ff27cc Tree: http://git-wip-us.apache.org/repos/asf/incubator-impala/tree/d0ff27cc Diff: http://git-wip-us.apache.org/repos/asf/incubator-impala/diff/d0ff27cc Branch: refs/heads/master Commit: d0ff27cc3a7a693ae412fcbf9f8f1709de013855 Parents: 2618766 Author: Christopher Channing <[email protected]> Authored: Tue Aug 16 14:56:55 2016 +0100 Committer: Internal Jenkins <[email protected]> Committed: Sat Aug 27 02:21:34 2016 +0000 ---------------------------------------------------------------------- .../java/com/cloudera/impala/analysis/Path.java | 10 +--- .../impala/analysis/AnalyzeStmtsTest.java | 60 ++++++++++++++------ 2 files changed, 45 insertions(+), 25 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/d0ff27cc/fe/src/main/java/com/cloudera/impala/analysis/Path.java ---------------------------------------------------------------------- diff --git a/fe/src/main/java/com/cloudera/impala/analysis/Path.java b/fe/src/main/java/com/cloudera/impala/analysis/Path.java index 4e0b128..03c601c 100644 --- a/fe/src/main/java/com/cloudera/impala/analysis/Path.java +++ b/fe/src/main/java/com/cloudera/impala/analysis/Path.java @@ -215,8 +215,6 @@ public class Path { currentType = rootTable_.getType().getItemType(); } - // True if the next raw-path element must match explicitly. - boolean expectExplicitMatch = false; // Map all remaining raw-path elements to field types and positions. while (rawPathIdx < rawPath_.size()) { if (!currentType.isComplexType()) return false; @@ -225,8 +223,10 @@ public class Path { StructField field = structType.getField(rawPath_.get(rawPathIdx)); if (field == null) { // Resolve implicit path. - if (!expectExplicitMatch && structType instanceof CollectionStructType) { + if (structType instanceof CollectionStructType) { field = ((CollectionStructType) structType).getOptionalField(); + // Collections must be matched explicitly. + if (field.getType().isCollectionType()) return false; } else { // Failed to resolve implicit or explicit path. return false; @@ -235,13 +235,9 @@ public class Path { matchedTypes_.add(field.getType()); matchedPositions_.add(field.getPosition()); currentType = field.getType(); - // After an implicit match there must be an explicit match. - expectExplicitMatch = true; // Do not consume a raw-path element. continue; } - // After an explicit match we could have an implicit match again. - expectExplicitMatch = false; matchedTypes_.add(field.getType()); matchedPositions_.add(field.getPosition()); if (field.getType().isCollectionType() && firstCollectionPathIdx_ == -1) { http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/d0ff27cc/fe/src/test/java/com/cloudera/impala/analysis/AnalyzeStmtsTest.java ---------------------------------------------------------------------- diff --git a/fe/src/test/java/com/cloudera/impala/analysis/AnalyzeStmtsTest.java b/fe/src/test/java/com/cloudera/impala/analysis/AnalyzeStmtsTest.java index 33fd6ac..05a0855 100644 --- a/fe/src/test/java/com/cloudera/impala/analysis/AnalyzeStmtsTest.java +++ b/fe/src/test/java/com/cloudera/impala/analysis/AnalyzeStmtsTest.java @@ -575,24 +575,48 @@ public class AnalyzeStmtsTest extends AnalyzerTest { testTableRefPath("select 1 from d.t7, t7.c5, c5.m2.m3", path(4, 1, 1, 1, 2), path(4, 1, 1)); - // Tests that an attempted implicit match must be succeeded by an explicit match. - addTestTable("create table d.t8 (" + - "s1 struct<" + - " s2:struct<" + - " a:array<" + - " array<struct<" + - " e:int,f:string>>>>>)"); - // Explanation of test: - // - d.t8.s1.s2.a resolves to a CollectionStructType with fields 'item' and 'pos' - // - we are allowed to implicitly skip the 'item' field - // - d.t8.s1.s2.a.item again resolves to a CollectionStructType with 'item' and 'pos' - // - however, we are not allowed to implicitly skip 'item' again, since we have - // already skipped 'item' previously - // - the rule is: an implicit match must be followed by an explicit one - AnalysisError("select f from d.t8.s1.s2.a", - "Could not resolve column/field reference: 'f'"); - AnalysisError("select 1 from d.t8.s1.s2.a, a.f", - "Could not resolve table reference: 'a.f'"); + // Tests that implicit references are not allowed through collection types. + addTestTable("create table d.t8 (" + + "c1 array<map<string, string>>," + + "c2 map<string, array<struct<a:int>>>," + + "c3 struct<s1:struct<a:array<array<struct<e:int, f:string>>>>>)"); + testImplicitPathFailure("d.t8", true, "c1", "key", "value"); + testImplicitPathFailure("d.t8", true, "c2", "pos"); + testImplicitPathFailure("d.t8.c3.s1", false, "a", "f"); + } + + private void testImplicitPathFailure(String parent, boolean parentIsCollection, + String collection, String...fields) { + String[] parentElements = parent.split("\\."); + String implicitAlias = parentElements[parentElements.length - 1]; + String explicitAlias = "x"; + for (String field : fields) { + // Tests that the path in the select list item does not resolve. + AnalysisError(String.format("select %s from %s.%s", field, parent, collection), + String.format("Could not resolve column/field reference: '%s'", field)); + AnalysisError(String.format("select %s.%s from %s.%s %s", + explicitAlias, field, parent, collection, explicitAlias), + String.format("Could not resolve column/field reference: '%s.%s'", + explicitAlias, field)); + if (parentIsCollection) { + AnalysisError(String.format("select %s from %s join %s.%s", + field, parent, implicitAlias, collection), + String.format("Could not resolve column/field reference: '%s'", field)); + AnalysisError(String.format("select %s.%s from %s %s join %s.%s", + explicitAlias, field, parent, explicitAlias, explicitAlias, collection), + String.format("Could not resolve column/field reference: '%s.%s'", + explicitAlias, field)); + } + // Tests that the path in the last table reference does not resolve. + AnalysisError(String.format("select 1 from %s.%s join %s.%s", + parent, collection, collection, field), + String.format("Could not resolve table reference: '%s.%s'", + collection, field)); + AnalysisError(String.format("select 1 from %s.%s %s join %s.%s", + parent, collection, explicitAlias, explicitAlias, field), + String.format("Could not resolve table reference: '%s.%s'", + explicitAlias, field)); + } } @Test
