Repository: spark
Updated Branches:
refs/heads/master 1fe08d62f -> eed9c4ef8
[SPARK-21129][SQL] Arguments of SQL function call should not be named
expressions
### What changes were proposed in this pull request?
Function argument should not be named expressions. It could cause two issues:
- Misleading error message
- Unexpected query results when the column name is `distinct`, which is not a
reserved word in our parser.
```
spark-sql> select count(distinct c1, distinct c2) from t1;
Error in query: cannot resolve '`distinct`' given input columns: [c1, c2]; line
1 pos 26;
'Project [unresolvedalias('count(c1#30, 'distinct), None)]
+- SubqueryAlias t1
+- CatalogRelation `default`.`t1`,
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, [c1#30, c2#31]
```
After the fix, the error message becomes
```
spark-sql> select count(distinct c1, distinct c2) from t1;
Error in query:
extraneous input 'c2' expecting {')', ',', '.', '[', 'OR', 'AND', 'IN', NOT,
'BETWEEN', 'LIKE', RLIKE, 'IS', EQ, '<=>', '<>', '!=', '<', LTE, '>', GTE, '+',
'-', '*', '/', '%', 'DIV', '&', '|', '||', '^'}(line 1, pos 35)
== SQL ==
select count(distinct c1, distinct c2) from t1
-----------------------------------^^^
```
### How was this patch tested?
Added a test case to parser suite.
Author: Xiao Li <[email protected]>
Author: gatorsmile <[email protected]>
Closes #18338 from gatorsmile/parserDistinctAggFunc.
Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/eed9c4ef
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/eed9c4ef
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/eed9c4ef
Branch: refs/heads/master
Commit: eed9c4ef859fdb75a816a3e0ce2d593b34b23444
Parents: 1fe08d6
Author: Xiao Li <[email protected]>
Authored: Fri Jun 30 14:23:56 2017 -0700
Committer: gatorsmile <[email protected]>
Committed: Fri Jun 30 14:23:56 2017 -0700
----------------------------------------------------------------------
.../apache/spark/sql/catalyst/parser/SqlBase.g4 | 3 +-
.../apache/spark/sql/catalyst/dsl/package.scala | 1 +
.../spark/sql/catalyst/parser/AstBuilder.scala | 9 +++++-
.../catalyst/parser/ExpressionParserSuite.scala | 6 ++--
.../sql/catalyst/parser/PlanParserSuite.scala | 6 ++++
.../test/resources/sql-tests/inputs/struct.sql | 7 +++++
.../resources/sql-tests/results/struct.sql.out | 32 +++++++++++++++++++-
7 files changed, 59 insertions(+), 5 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/spark/blob/eed9c4ef/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4
----------------------------------------------------------------------
diff --git
a/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4
b/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4
index 9456031..7ffa150 100644
---
a/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4
+++
b/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4
@@ -561,6 +561,7 @@ primaryExpression
| CASE whenClause+ (ELSE elseExpression=expression)? END
#searchedCase
| CASE value=expression whenClause+ (ELSE elseExpression=expression)? END
#simpleCase
| CAST '(' expression AS dataType ')'
#cast
+ | STRUCT '(' (argument+=namedExpression (',' argument+=namedExpression)*)?
')' #struct
| FIRST '(' expression (IGNORE NULLS)? ')'
#first
| LAST '(' expression (IGNORE NULLS)? ')'
#last
| POSITION '(' substr=valueExpression IN str=valueExpression ')'
#position
@@ -569,7 +570,7 @@ primaryExpression
| qualifiedName '.' ASTERISK
#star
| '(' namedExpression (',' namedExpression)+ ')'
#rowConstructor
| '(' query ')'
#subqueryExpression
- | qualifiedName '(' (setQuantifier? namedExpression (','
namedExpression)*)? ')'
+ | qualifiedName '(' (setQuantifier? argument+=expression (','
argument+=expression)*)? ')'
(OVER windowSpec)?
#functionCall
| value=primaryExpression '[' index=valueExpression ']'
#subscript
| identifier
#columnReference
http://git-wip-us.apache.org/repos/asf/spark/blob/eed9c4ef/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/dsl/package.scala
----------------------------------------------------------------------
diff --git
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/dsl/package.scala
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/dsl/package.scala
index f679256..7c100af 100644
---
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/dsl/package.scala
+++
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/dsl/package.scala
@@ -170,6 +170,7 @@ package object dsl {
case Seq() => UnresolvedStar(None)
case target => UnresolvedStar(Option(target))
}
+ def namedStruct(e: Expression*): Expression = CreateNamedStruct(e)
def callFunction[T, U](
func: T => U,
http://git-wip-us.apache.org/repos/asf/spark/blob/eed9c4ef/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
----------------------------------------------------------------------
diff --git
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
index ef79cbc..8eac3ef 100644
---
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
+++
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
@@ -1062,6 +1062,13 @@ class AstBuilder(conf: SQLConf) extends
SqlBaseBaseVisitor[AnyRef] with Logging
}
/**
+ * Create a [[CreateStruct]] expression.
+ */
+ override def visitStruct(ctx: StructContext): Expression = withOrigin(ctx) {
+ CreateStruct(ctx.argument.asScala.map(expression))
+ }
+
+ /**
* Create a [[First]] expression.
*/
override def visitFirst(ctx: FirstContext): Expression = withOrigin(ctx) {
@@ -1091,7 +1098,7 @@ class AstBuilder(conf: SQLConf) extends
SqlBaseBaseVisitor[AnyRef] with Logging
// Create the function call.
val name = ctx.qualifiedName.getText
val isDistinct = Option(ctx.setQuantifier()).exists(_.DISTINCT != null)
- val arguments = ctx.namedExpression().asScala.map(expression) match {
+ val arguments = ctx.argument.asScala.map(expression) match {
case Seq(UnresolvedStar(None))
if name.toLowerCase(Locale.ROOT) == "count" && !isDistinct =>
// Transform COUNT(*) into COUNT(1).
http://git-wip-us.apache.org/repos/asf/spark/blob/eed9c4ef/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/ExpressionParserSuite.scala
----------------------------------------------------------------------
diff --git
a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/ExpressionParserSuite.scala
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/ExpressionParserSuite.scala
index 4d08f01..45f9f72 100644
---
a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/ExpressionParserSuite.scala
+++
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/ExpressionParserSuite.scala
@@ -231,7 +231,7 @@ class ExpressionParserSuite extends PlanTest {
assertEqual("foo(distinct a, b)", 'foo.distinctFunction('a, 'b))
assertEqual("grouping(distinct a, b)", 'grouping.distinctFunction('a, 'b))
assertEqual("`select`(all a, b)", 'select.function('a, 'b))
- assertEqual("foo(a as x, b as e)", 'foo.function('a as 'x, 'b as 'e))
+ intercept("foo(a x)", "extraneous input 'x'")
}
test("window function expressions") {
@@ -330,7 +330,9 @@ class ExpressionParserSuite extends PlanTest {
assertEqual("a.b", UnresolvedAttribute("a.b"))
assertEqual("`select`.b", UnresolvedAttribute("select.b"))
assertEqual("(a + b).b", ('a + 'b).getField("b")) // This will fail
analysis.
- assertEqual("struct(a, b).b", 'struct.function('a, 'b).getField("b"))
+ assertEqual(
+ "struct(a, b).b",
+ namedStruct(NamePlaceholder, 'a, NamePlaceholder, 'b).getField("b"))
}
test("reference") {
http://git-wip-us.apache.org/repos/asf/spark/blob/eed9c4ef/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/PlanParserSuite.scala
----------------------------------------------------------------------
diff --git
a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/PlanParserSuite.scala
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/PlanParserSuite.scala
index bf15b85..5b2573f 100644
---
a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/PlanParserSuite.scala
+++
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/PlanParserSuite.scala
@@ -223,6 +223,12 @@ class PlanParserSuite extends AnalysisTest {
assertEqual(s"$sql grouping sets((a, b), (a), ())",
GroupingSets(Seq(Seq('a, 'b), Seq('a), Seq()), Seq('a, 'b), table("d"),
Seq('a, 'b, 'sum.function('c).as("c"))))
+
+ val m = intercept[ParseException] {
+ parsePlan("SELECT a, b, count(distinct a, distinct b) as c FROM d GROUP
BY a, b")
+ }.getMessage
+ assert(m.contains("extraneous input 'b'"))
+
}
test("limit") {
http://git-wip-us.apache.org/repos/asf/spark/blob/eed9c4ef/sql/core/src/test/resources/sql-tests/inputs/struct.sql
----------------------------------------------------------------------
diff --git a/sql/core/src/test/resources/sql-tests/inputs/struct.sql
b/sql/core/src/test/resources/sql-tests/inputs/struct.sql
index e56344d..93a1238 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/struct.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/struct.sql
@@ -18,3 +18,10 @@ SELECT ID, STRUCT(ST.*,CAST(ID AS STRING) AS E) NST FROM
tbl_x;
-- Prepend a column to a struct
SELECT ID, STRUCT(CAST(ID AS STRING) AS AA, ST.*) NST FROM tbl_x;
+
+-- Select a column from a struct
+SELECT ID, STRUCT(ST.*).C NST FROM tbl_x;
+SELECT ID, STRUCT(ST.C, ST.D).D NST FROM tbl_x;
+
+-- Select an alias from a struct
+SELECT ID, STRUCT(ST.C as STC, ST.D as STD).STD FROM tbl_x;
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/spark/blob/eed9c4ef/sql/core/src/test/resources/sql-tests/results/struct.sql.out
----------------------------------------------------------------------
diff --git a/sql/core/src/test/resources/sql-tests/results/struct.sql.out
b/sql/core/src/test/resources/sql-tests/results/struct.sql.out
index 3e32f46..1da33bc 100644
--- a/sql/core/src/test/resources/sql-tests/results/struct.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/struct.sql.out
@@ -1,5 +1,5 @@
-- Automatically generated by SQLQueryTestSuite
--- Number of queries: 6
+-- Number of queries: 9
-- !query 0
@@ -58,3 +58,33 @@ struct<ID:int,NST:struct<AA:string,C:string,D:string>>
1 {"AA":"1","C":"gamma","D":"delta"}
2 {"AA":"2","C":"epsilon","D":"eta"}
3 {"AA":"3","C":"theta","D":"iota"}
+
+
+-- !query 6
+SELECT ID, STRUCT(ST.*).C NST FROM tbl_x
+-- !query 6 schema
+struct<ID:int,NST:string>
+-- !query 6 output
+1 gamma
+2 epsilon
+3 theta
+
+
+-- !query 7
+SELECT ID, STRUCT(ST.C, ST.D).D NST FROM tbl_x
+-- !query 7 schema
+struct<ID:int,NST:string>
+-- !query 7 output
+1 delta
+2 eta
+3 iota
+
+
+-- !query 8
+SELECT ID, STRUCT(ST.C as STC, ST.D as STD).STD FROM tbl_x
+-- !query 8 schema
+struct<ID:int,named_struct(STC, ST.C AS `C` AS `STC`, STD, ST.D AS `D` AS
`STD`).STD:string>
+-- !query 8 output
+1 delta
+2 eta
+3 iota
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]