spark git commit: [SPARK-5278][SQL] Introduce UnresolvedGetField and complete the check of ambiguous reference to fields
Repository: spark Updated Branches: refs/heads/branch-1.3 d82260606 - 1b148adfc [SPARK-5278][SQL] Introduce UnresolvedGetField and complete the check of ambiguous reference to fields When the `GetField` chain(`a.b.c.d.`) is interrupted by `GetItem` like `a.b[0].c.d`, then the check of ambiguous reference to fields is broken. The reason is that: for something like `a.b[0].c.d`, we first parse it to `GetField(GetField(GetItem(Unresolved(a.b), 0), c), d)`. Then in `LogicalPlan#resolve`, we resolve `a.b` and build a `GetField` chain from bottom(the relation). But for the 2 outer `GetFiled`, we have to resolve them in `Analyzer` or do it in `GetField` lazily, check data type of child, search needed field, etc. which is similar to what we have done in `LogicalPlan#resolve`. So in this PR, the fix is just copy the same logic in `LogicalPlan#resolve` to `Analyzer`, which is simple and quick, but I do suggest introduce `UnresolvedGetFiled` like I explained in https://github.com/apache/spark/pull/2405. Author: Wenchen Fan cloud0...@outlook.com Closes #4068 from cloud-fan/simple and squashes the following commits: a6857b5 [Wenchen Fan] fix import order 8411c40 [Wenchen Fan] use UnresolvedGetField (cherry picked from commit 4793c8402a19afe4df51129a7f99e07494a76af2) Signed-off-by: Michael Armbrust mich...@databricks.com Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/1b148adf Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/1b148adf Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/1b148adf Branch: refs/heads/branch-1.3 Commit: 1b148adfc583ac405e37d177ce6ec6b0bc13e381 Parents: d822606 Author: Wenchen Fan cloud0...@outlook.com Authored: Fri Feb 6 13:08:09 2015 -0800 Committer: Michael Armbrust mich...@databricks.com Committed: Fri Feb 6 13:08:20 2015 -0800 -- .../apache/spark/sql/catalyst/SqlParser.scala | 2 +- .../spark/sql/catalyst/analysis/Analyzer.scala | 34 ++-- .../sql/catalyst/analysis/unresolved.scala | 12 +++ .../apache/spark/sql/catalyst/dsl/package.scala | 4 +-- .../sql/catalyst/expressions/complexTypes.scala | 24 ++ .../sql/catalyst/optimizer/Optimizer.scala | 2 +- .../catalyst/plans/logical/LogicalPlan.scala| 33 ++- .../expressions/ExpressionEvaluationSuite.scala | 25 ++ .../optimizer/ConstantFoldingSuite.scala| 4 +-- .../scala/org/apache/spark/sql/Column.scala | 3 +- .../org/apache/spark/sql/hive/HiveQl.scala | 2 +- .../hive/execution/HiveResolutionSuite.scala| 27 ++-- 12 files changed, 84 insertions(+), 88 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/1b148adf/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/SqlParser.scala -- diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/SqlParser.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/SqlParser.scala index 1c588ee..124f083 100755 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/SqlParser.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/SqlParser.scala @@ -372,7 +372,7 @@ class SqlParser extends AbstractSparkSQLParser { | expression ~ ([ ~ expression ~ ]) ^^ { case base ~ ordinal = GetItem(base, ordinal) } | (expression ~ .) ~ ident ^^ - { case base ~ fieldName = GetField(base, fieldName) } + { case base ~ fieldName = UnresolvedGetField(base, fieldName) } | cast | ( ~ expression ~ ) | function http://git-wip-us.apache.org/repos/asf/spark/blob/1b148adf/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala -- diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala index ae1aee0..0b59ed1 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala @@ -285,7 +285,7 @@ class Analyzer(catalog: Catalog, case q: LogicalPlan = logTrace(sAttempting to resolve ${q.simpleString}) -q transformExpressions { +q transformExpressionsUp { case u @ UnresolvedAttribute(name) if resolver(name, VirtualColumn.groupingIdName) q.isInstanceOf[GroupingAnalytics] = // Resolve the virtual column GROUPING__ID for the operator GroupingAnalytics @@ -295,15 +295,8 @@ class Analyzer(catalog: Catalog, val result = q.resolveChildren(name, resolver).getOrElse(u)
spark git commit: [SPARK-5278][SQL] Introduce UnresolvedGetField and complete the check of ambiguous reference to fields
Repository: spark Updated Branches: refs/heads/master bc3635608 - 4793c8402 [SPARK-5278][SQL] Introduce UnresolvedGetField and complete the check of ambiguous reference to fields When the `GetField` chain(`a.b.c.d.`) is interrupted by `GetItem` like `a.b[0].c.d`, then the check of ambiguous reference to fields is broken. The reason is that: for something like `a.b[0].c.d`, we first parse it to `GetField(GetField(GetItem(Unresolved(a.b), 0), c), d)`. Then in `LogicalPlan#resolve`, we resolve `a.b` and build a `GetField` chain from bottom(the relation). But for the 2 outer `GetFiled`, we have to resolve them in `Analyzer` or do it in `GetField` lazily, check data type of child, search needed field, etc. which is similar to what we have done in `LogicalPlan#resolve`. So in this PR, the fix is just copy the same logic in `LogicalPlan#resolve` to `Analyzer`, which is simple and quick, but I do suggest introduce `UnresolvedGetFiled` like I explained in https://github.com/apache/spark/pull/2405. Author: Wenchen Fan cloud0...@outlook.com Closes #4068 from cloud-fan/simple and squashes the following commits: a6857b5 [Wenchen Fan] fix import order 8411c40 [Wenchen Fan] use UnresolvedGetField Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/4793c840 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/4793c840 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/4793c840 Branch: refs/heads/master Commit: 4793c8402a19afe4df51129a7f99e07494a76af2 Parents: bc36356 Author: Wenchen Fan cloud0...@outlook.com Authored: Fri Feb 6 13:08:09 2015 -0800 Committer: Michael Armbrust mich...@databricks.com Committed: Fri Feb 6 13:08:09 2015 -0800 -- .../apache/spark/sql/catalyst/SqlParser.scala | 2 +- .../spark/sql/catalyst/analysis/Analyzer.scala | 34 ++-- .../sql/catalyst/analysis/unresolved.scala | 12 +++ .../apache/spark/sql/catalyst/dsl/package.scala | 4 +-- .../sql/catalyst/expressions/complexTypes.scala | 24 ++ .../sql/catalyst/optimizer/Optimizer.scala | 2 +- .../catalyst/plans/logical/LogicalPlan.scala| 33 ++- .../expressions/ExpressionEvaluationSuite.scala | 25 ++ .../optimizer/ConstantFoldingSuite.scala| 4 +-- .../scala/org/apache/spark/sql/Column.scala | 3 +- .../org/apache/spark/sql/hive/HiveQl.scala | 2 +- .../hive/execution/HiveResolutionSuite.scala| 27 ++-- 12 files changed, 84 insertions(+), 88 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/4793c840/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/SqlParser.scala -- diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/SqlParser.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/SqlParser.scala index 1c588ee..124f083 100755 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/SqlParser.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/SqlParser.scala @@ -372,7 +372,7 @@ class SqlParser extends AbstractSparkSQLParser { | expression ~ ([ ~ expression ~ ]) ^^ { case base ~ ordinal = GetItem(base, ordinal) } | (expression ~ .) ~ ident ^^ - { case base ~ fieldName = GetField(base, fieldName) } + { case base ~ fieldName = UnresolvedGetField(base, fieldName) } | cast | ( ~ expression ~ ) | function http://git-wip-us.apache.org/repos/asf/spark/blob/4793c840/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala -- diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala index ae1aee0..0b59ed1 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala @@ -285,7 +285,7 @@ class Analyzer(catalog: Catalog, case q: LogicalPlan = logTrace(sAttempting to resolve ${q.simpleString}) -q transformExpressions { +q transformExpressionsUp { case u @ UnresolvedAttribute(name) if resolver(name, VirtualColumn.groupingIdName) q.isInstanceOf[GroupingAnalytics] = // Resolve the virtual column GROUPING__ID for the operator GroupingAnalytics @@ -295,15 +295,8 @@ class Analyzer(catalog: Catalog, val result = q.resolveChildren(name, resolver).getOrElse(u) logDebug(sResolving $u to $result) result - - // Resolve field names using the resolver. - case f @