Repository: spark Updated Branches: refs/heads/master 92ea7fd7b -> b526f70c1
[SPARK-19951][SQL] Add string concatenate operator || to Spark SQL ## What changes were proposed in this pull request? This pr added code to support `||` for string concatenation. This string operation is supported in PostgreSQL and MySQL. ## How was this patch tested? Added tests in `SparkSqlParserSuite` Author: Takeshi Yamamuro <[email protected]> Closes #17711 from maropu/SPARK-19951. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/b526f70c Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/b526f70c Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/b526f70c Branch: refs/heads/master Commit: b526f70c16caa2062a77117808860e1d3662114c Parents: 92ea7fd Author: Takeshi Yamamuro <[email protected]> Authored: Fri May 12 09:55:51 2017 -0700 Committer: Xiao Li <[email protected]> Committed: Fri May 12 09:55:51 2017 -0700 ---------------------------------------------------------------------- .../apache/spark/sql/catalyst/parser/SqlBase.g4 | 5 +- .../spark/sql/catalyst/parser/AstBuilder.scala | 2 + .../resources/sql-tests/inputs/arithmetic.sql | 34 --- .../resources/sql-tests/inputs/operators.sql | 55 ++++ .../sql-tests/inputs/string-functions.sql | 3 + .../sql-tests/results/arithmetic.sql.out | 226 --------------- .../sql-tests/results/operators.sql.out | 286 +++++++++++++++++++ .../sql-tests/results/string-functions.sql.out | 10 +- .../sql/execution/SparkSqlParserSuite.scala | 15 +- 9 files changed, 371 insertions(+), 265 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/spark/blob/b526f70c/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 ---------------------------------------------------------------------- diff --git a/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 b/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 index ed5450b..2b7c290 100644 --- a/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 +++ b/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 @@ -549,7 +549,7 @@ valueExpression : primaryExpression #valueExpressionDefault | operator=(MINUS | PLUS | TILDE) valueExpression #arithmeticUnary | left=valueExpression operator=(ASTERISK | SLASH | PERCENT | DIV) right=valueExpression #arithmeticBinary - | left=valueExpression operator=(PLUS | MINUS) right=valueExpression #arithmeticBinary + | left=valueExpression operator=(PLUS | MINUS | CONCAT_PIPE) right=valueExpression #arithmeticBinary | left=valueExpression operator=AMPERSAND right=valueExpression #arithmeticBinary | left=valueExpression operator=HAT right=valueExpression #arithmeticBinary | left=valueExpression operator=PIPE right=valueExpression #arithmeticBinary @@ -590,7 +590,7 @@ comparisonOperator ; arithmeticOperator - : PLUS | MINUS | ASTERISK | SLASH | PERCENT | DIV | TILDE | AMPERSAND | PIPE | HAT + : PLUS | MINUS | ASTERISK | SLASH | PERCENT | DIV | TILDE | AMPERSAND | PIPE | CONCAT_PIPE | HAT ; predicateOperator @@ -869,6 +869,7 @@ DIV: 'DIV'; TILDE: '~'; AMPERSAND: '&'; PIPE: '|'; +CONCAT_PIPE: '||'; HAT: '^'; PERCENTLIT: 'PERCENT'; http://git-wip-us.apache.org/repos/asf/spark/blob/b526f70c/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala ---------------------------------------------------------------------- diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala index 4b11b6f..0e974a0 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala @@ -1010,6 +1010,8 @@ class AstBuilder(conf: SQLConf) extends SqlBaseBaseVisitor[AnyRef] with Logging Add(left, right) case SqlBaseParser.MINUS => Subtract(left, right) + case SqlBaseParser.CONCAT_PIPE => + Concat(left :: right :: Nil) case SqlBaseParser.AMPERSAND => BitwiseAnd(left, right) case SqlBaseParser.HAT => http://git-wip-us.apache.org/repos/asf/spark/blob/b526f70c/sql/core/src/test/resources/sql-tests/inputs/arithmetic.sql ---------------------------------------------------------------------- diff --git a/sql/core/src/test/resources/sql-tests/inputs/arithmetic.sql b/sql/core/src/test/resources/sql-tests/inputs/arithmetic.sql deleted file mode 100644 index f62b10c..0000000 --- a/sql/core/src/test/resources/sql-tests/inputs/arithmetic.sql +++ /dev/null @@ -1,34 +0,0 @@ - --- unary minus and plus -select -100; -select +230; -select -5.2; -select +6.8e0; -select -key, +key from testdata where key = 2; -select -(key + 1), - key + 1, +(key + 5) from testdata where key = 1; -select -max(key), +max(key) from testdata; -select - (-10); -select + (-key) from testdata where key = 32; -select - (+max(key)) from testdata; -select - - 3; -select - + 20; -select + + 100; -select - - max(key) from testdata; -select + - key from testdata where key = 33; - --- div -select 5 / 2; -select 5 / 0; -select 5 / null; -select null / 5; -select 5 div 2; -select 5 div 0; -select 5 div null; -select null div 5; - --- other arithmetics -select 1 + 2; -select 1 - 2; -select 2 * 5; -select 5 % 3; -select pmod(-7, 3); http://git-wip-us.apache.org/repos/asf/spark/blob/b526f70c/sql/core/src/test/resources/sql-tests/inputs/operators.sql ---------------------------------------------------------------------- diff --git a/sql/core/src/test/resources/sql-tests/inputs/operators.sql b/sql/core/src/test/resources/sql-tests/inputs/operators.sql new file mode 100644 index 0000000..6339d69 --- /dev/null +++ b/sql/core/src/test/resources/sql-tests/inputs/operators.sql @@ -0,0 +1,55 @@ + +-- unary minus and plus +select -100; +select +230; +select -5.2; +select +6.8e0; +select -key, +key from testdata where key = 2; +select -(key + 1), - key + 1, +(key + 5) from testdata where key = 1; +select -max(key), +max(key) from testdata; +select - (-10); +select + (-key) from testdata where key = 32; +select - (+max(key)) from testdata; +select - - 3; +select - + 20; +select + + 100; +select - - max(key) from testdata; +select + - key from testdata where key = 33; + +-- div +select 5 / 2; +select 5 / 0; +select 5 / null; +select null / 5; +select 5 div 2; +select 5 div 0; +select 5 div null; +select null div 5; + +-- other arithmetics +select 1 + 2; +select 1 - 2; +select 2 * 5; +select 5 % 3; +select pmod(-7, 3); + +-- check operator precedence. +-- We follow Oracle operator precedence in the table below that lists the levels of precedence +-- among SQL operators from high to low: +------------------------------------------------------------------------------------------ +-- Operator Operation +------------------------------------------------------------------------------------------ +-- +, - identity, negation +-- *, / multiplication, division +-- +, -, || addition, subtraction, concatenation +-- =, !=, <, >, <=, >=, IS NULL, LIKE, BETWEEN, IN comparison +-- NOT exponentiation, logical negation +-- AND conjunction +-- OR disjunction +------------------------------------------------------------------------------------------ +explain select 'a' || 1 + 2; +explain select 1 - 2 || 'b'; +explain select 2 * 4 + 3 || 'b'; +explain select 3 + 1 || 'a' || 4 / 2; +explain select 1 == 1 OR 'a' || 'b' == 'ab'; +explain select 'a' || 'c' == 'ac' AND 2 == 3; http://git-wip-us.apache.org/repos/asf/spark/blob/b526f70c/sql/core/src/test/resources/sql-tests/inputs/string-functions.sql ---------------------------------------------------------------------- diff --git a/sql/core/src/test/resources/sql-tests/inputs/string-functions.sql b/sql/core/src/test/resources/sql-tests/inputs/string-functions.sql index f21981e..7005caf 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/string-functions.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/string-functions.sql @@ -1,3 +1,6 @@ -- Argument number exception select concat_ws(); select format_string(); + +-- A pipe operator for string concatenation +select 'a' || 'b' || 'c'; http://git-wip-us.apache.org/repos/asf/spark/blob/b526f70c/sql/core/src/test/resources/sql-tests/results/arithmetic.sql.out ---------------------------------------------------------------------- diff --git a/sql/core/src/test/resources/sql-tests/results/arithmetic.sql.out b/sql/core/src/test/resources/sql-tests/results/arithmetic.sql.out deleted file mode 100644 index ce42c01..0000000 --- a/sql/core/src/test/resources/sql-tests/results/arithmetic.sql.out +++ /dev/null @@ -1,226 +0,0 @@ --- Automatically generated by SQLQueryTestSuite --- Number of queries: 28 - - --- !query 0 -select -100 --- !query 0 schema -struct<-100:int> --- !query 0 output --100 - - --- !query 1 -select +230 --- !query 1 schema -struct<230:int> --- !query 1 output -230 - - --- !query 2 -select -5.2 --- !query 2 schema -struct<-5.2:decimal(2,1)> --- !query 2 output --5.2 - - --- !query 3 -select +6.8e0 --- !query 3 schema -struct<6.8:decimal(2,1)> --- !query 3 output -6.8 - - --- !query 4 -select -key, +key from testdata where key = 2 --- !query 4 schema -struct<(- key):int,key:int> --- !query 4 output --2 2 - - --- !query 5 -select -(key + 1), - key + 1, +(key + 5) from testdata where key = 1 --- !query 5 schema -struct<(- (key + 1)):int,((- key) + 1):int,(key + 5):int> --- !query 5 output --2 0 6 - - --- !query 6 -select -max(key), +max(key) from testdata --- !query 6 schema -struct<(- max(key)):int,max(key):int> --- !query 6 output --100 100 - - --- !query 7 -select - (-10) --- !query 7 schema -struct<(- -10):int> --- !query 7 output -10 - - --- !query 8 -select + (-key) from testdata where key = 32 --- !query 8 schema -struct<(- key):int> --- !query 8 output --32 - - --- !query 9 -select - (+max(key)) from testdata --- !query 9 schema -struct<(- max(key)):int> --- !query 9 output --100 - - --- !query 10 -select - - 3 --- !query 10 schema -struct<(- -3):int> --- !query 10 output -3 - - --- !query 11 -select - + 20 --- !query 11 schema -struct<(- 20):int> --- !query 11 output --20 - - --- !query 12 -select + + 100 --- !query 12 schema -struct<100:int> --- !query 12 output -100 - - --- !query 13 -select - - max(key) from testdata --- !query 13 schema -struct<(- (- max(key))):int> --- !query 13 output -100 - - --- !query 14 -select + - key from testdata where key = 33 --- !query 14 schema -struct<(- key):int> --- !query 14 output --33 - - --- !query 15 -select 5 / 2 --- !query 15 schema -struct<(CAST(5 AS DOUBLE) / CAST(2 AS DOUBLE)):double> --- !query 15 output -2.5 - - --- !query 16 -select 5 / 0 --- !query 16 schema -struct<(CAST(5 AS DOUBLE) / CAST(0 AS DOUBLE)):double> --- !query 16 output -NULL - - --- !query 17 -select 5 / null --- !query 17 schema -struct<(CAST(5 AS DOUBLE) / CAST(NULL AS DOUBLE)):double> --- !query 17 output -NULL - - --- !query 18 -select null / 5 --- !query 18 schema -struct<(CAST(NULL AS DOUBLE) / CAST(5 AS DOUBLE)):double> --- !query 18 output -NULL - - --- !query 19 -select 5 div 2 --- !query 19 schema -struct<CAST((CAST(5 AS DOUBLE) / CAST(2 AS DOUBLE)) AS BIGINT):bigint> --- !query 19 output -2 - - --- !query 20 -select 5 div 0 --- !query 20 schema -struct<CAST((CAST(5 AS DOUBLE) / CAST(0 AS DOUBLE)) AS BIGINT):bigint> --- !query 20 output -NULL - - --- !query 21 -select 5 div null --- !query 21 schema -struct<CAST((CAST(5 AS DOUBLE) / CAST(NULL AS DOUBLE)) AS BIGINT):bigint> --- !query 21 output -NULL - - --- !query 22 -select null div 5 --- !query 22 schema -struct<CAST((CAST(NULL AS DOUBLE) / CAST(5 AS DOUBLE)) AS BIGINT):bigint> --- !query 22 output -NULL - - --- !query 23 -select 1 + 2 --- !query 23 schema -struct<(1 + 2):int> --- !query 23 output -3 - - --- !query 24 -select 1 - 2 --- !query 24 schema -struct<(1 - 2):int> --- !query 24 output --1 - - --- !query 25 -select 2 * 5 --- !query 25 schema -struct<(2 * 5):int> --- !query 25 output -10 - - --- !query 26 -select 5 % 3 --- !query 26 schema -struct<(5 % 3):int> --- !query 26 output -2 - - --- !query 27 -select pmod(-7, 3) --- !query 27 schema -struct<pmod(-7, 3):int> --- !query 27 output -2 http://git-wip-us.apache.org/repos/asf/spark/blob/b526f70c/sql/core/src/test/resources/sql-tests/results/operators.sql.out ---------------------------------------------------------------------- diff --git a/sql/core/src/test/resources/sql-tests/results/operators.sql.out b/sql/core/src/test/resources/sql-tests/results/operators.sql.out new file mode 100644 index 0000000..e0236f4 --- /dev/null +++ b/sql/core/src/test/resources/sql-tests/results/operators.sql.out @@ -0,0 +1,286 @@ +-- Automatically generated by SQLQueryTestSuite +-- Number of queries: 34 + + +-- !query 0 +select -100 +-- !query 0 schema +struct<-100:int> +-- !query 0 output +-100 + + +-- !query 1 +select +230 +-- !query 1 schema +struct<230:int> +-- !query 1 output +230 + + +-- !query 2 +select -5.2 +-- !query 2 schema +struct<-5.2:decimal(2,1)> +-- !query 2 output +-5.2 + + +-- !query 3 +select +6.8e0 +-- !query 3 schema +struct<6.8:decimal(2,1)> +-- !query 3 output +6.8 + + +-- !query 4 +select -key, +key from testdata where key = 2 +-- !query 4 schema +struct<(- key):int,key:int> +-- !query 4 output +-2 2 + + +-- !query 5 +select -(key + 1), - key + 1, +(key + 5) from testdata where key = 1 +-- !query 5 schema +struct<(- (key + 1)):int,((- key) + 1):int,(key + 5):int> +-- !query 5 output +-2 0 6 + + +-- !query 6 +select -max(key), +max(key) from testdata +-- !query 6 schema +struct<(- max(key)):int,max(key):int> +-- !query 6 output +-100 100 + + +-- !query 7 +select - (-10) +-- !query 7 schema +struct<(- -10):int> +-- !query 7 output +10 + + +-- !query 8 +select + (-key) from testdata where key = 32 +-- !query 8 schema +struct<(- key):int> +-- !query 8 output +-32 + + +-- !query 9 +select - (+max(key)) from testdata +-- !query 9 schema +struct<(- max(key)):int> +-- !query 9 output +-100 + + +-- !query 10 +select - - 3 +-- !query 10 schema +struct<(- -3):int> +-- !query 10 output +3 + + +-- !query 11 +select - + 20 +-- !query 11 schema +struct<(- 20):int> +-- !query 11 output +-20 + + +-- !query 12 +select + + 100 +-- !query 12 schema +struct<100:int> +-- !query 12 output +100 + + +-- !query 13 +select - - max(key) from testdata +-- !query 13 schema +struct<(- (- max(key))):int> +-- !query 13 output +100 + + +-- !query 14 +select + - key from testdata where key = 33 +-- !query 14 schema +struct<(- key):int> +-- !query 14 output +-33 + + +-- !query 15 +select 5 / 2 +-- !query 15 schema +struct<(CAST(5 AS DOUBLE) / CAST(2 AS DOUBLE)):double> +-- !query 15 output +2.5 + + +-- !query 16 +select 5 / 0 +-- !query 16 schema +struct<(CAST(5 AS DOUBLE) / CAST(0 AS DOUBLE)):double> +-- !query 16 output +NULL + + +-- !query 17 +select 5 / null +-- !query 17 schema +struct<(CAST(5 AS DOUBLE) / CAST(NULL AS DOUBLE)):double> +-- !query 17 output +NULL + + +-- !query 18 +select null / 5 +-- !query 18 schema +struct<(CAST(NULL AS DOUBLE) / CAST(5 AS DOUBLE)):double> +-- !query 18 output +NULL + + +-- !query 19 +select 5 div 2 +-- !query 19 schema +struct<CAST((CAST(5 AS DOUBLE) / CAST(2 AS DOUBLE)) AS BIGINT):bigint> +-- !query 19 output +2 + + +-- !query 20 +select 5 div 0 +-- !query 20 schema +struct<CAST((CAST(5 AS DOUBLE) / CAST(0 AS DOUBLE)) AS BIGINT):bigint> +-- !query 20 output +NULL + + +-- !query 21 +select 5 div null +-- !query 21 schema +struct<CAST((CAST(5 AS DOUBLE) / CAST(NULL AS DOUBLE)) AS BIGINT):bigint> +-- !query 21 output +NULL + + +-- !query 22 +select null div 5 +-- !query 22 schema +struct<CAST((CAST(NULL AS DOUBLE) / CAST(5 AS DOUBLE)) AS BIGINT):bigint> +-- !query 22 output +NULL + + +-- !query 23 +select 1 + 2 +-- !query 23 schema +struct<(1 + 2):int> +-- !query 23 output +3 + + +-- !query 24 +select 1 - 2 +-- !query 24 schema +struct<(1 - 2):int> +-- !query 24 output +-1 + + +-- !query 25 +select 2 * 5 +-- !query 25 schema +struct<(2 * 5):int> +-- !query 25 output +10 + + +-- !query 26 +select 5 % 3 +-- !query 26 schema +struct<(5 % 3):int> +-- !query 26 output +2 + + +-- !query 27 +select pmod(-7, 3) +-- !query 27 schema +struct<pmod(-7, 3):int> +-- !query 27 output +2 + + +-- !query 28 +explain select 'a' || 1 + 2 +-- !query 28 schema +struct<plan:string> +-- !query 28 output +== Physical Plan == +*Project [null AS (CAST(concat(a, CAST(1 AS STRING)) AS DOUBLE) + CAST(2 AS DOUBLE))#x] ++- Scan OneRowRelation[] + + +-- !query 29 +explain select 1 - 2 || 'b' +-- !query 29 schema +struct<plan:string> +-- !query 29 output +== Physical Plan == +*Project [-1b AS concat(CAST((1 - 2) AS STRING), b)#x] ++- Scan OneRowRelation[] + + +-- !query 30 +explain select 2 * 4 + 3 || 'b' +-- !query 30 schema +struct<plan:string> +-- !query 30 output +== Physical Plan == +*Project [11b AS concat(CAST(((2 * 4) + 3) AS STRING), b)#x] ++- Scan OneRowRelation[] + + +-- !query 31 +explain select 3 + 1 || 'a' || 4 / 2 +-- !query 31 schema +struct<plan:string> +-- !query 31 output +== Physical Plan == +*Project [4a2.0 AS concat(concat(CAST((3 + 1) AS STRING), a), CAST((CAST(4 AS DOUBLE) / CAST(2 AS DOUBLE)) AS STRING))#x] ++- Scan OneRowRelation[] + + +-- !query 32 +explain select 1 == 1 OR 'a' || 'b' == 'ab' +-- !query 32 schema +struct<plan:string> +-- !query 32 output +== Physical Plan == +*Project [true AS ((1 = 1) OR (concat(a, b) = ab))#x] ++- Scan OneRowRelation[] + + +-- !query 33 +explain select 'a' || 'c' == 'ac' AND 2 == 3 +-- !query 33 schema +struct<plan:string> +-- !query 33 output +== Physical Plan == +*Project [false AS ((concat(a, c) = ac) AND (2 = 3))#x] ++- Scan OneRowRelation[] http://git-wip-us.apache.org/repos/asf/spark/blob/b526f70c/sql/core/src/test/resources/sql-tests/results/string-functions.sql.out ---------------------------------------------------------------------- diff --git a/sql/core/src/test/resources/sql-tests/results/string-functions.sql.out b/sql/core/src/test/resources/sql-tests/results/string-functions.sql.out index 6961e9b..8ee0751 100644 --- a/sql/core/src/test/resources/sql-tests/results/string-functions.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/string-functions.sql.out @@ -1,5 +1,5 @@ -- Automatically generated by SQLQueryTestSuite --- Number of queries: 2 +-- Number of queries: 3 -- !query 0 @@ -18,3 +18,11 @@ struct<> -- !query 1 output org.apache.spark.sql.AnalysisException requirement failed: format_string() should take at least 1 argument; line 1 pos 7 + + +-- !query 2 +select 'a' || 'b' || 'c' +-- !query 2 schema +struct<concat(concat(a, b), c):string> +-- !query 2 output +abc http://git-wip-us.apache.org/repos/asf/spark/blob/b526f70c/sql/core/src/test/scala/org/apache/spark/sql/execution/SparkSqlParserSuite.scala ---------------------------------------------------------------------- diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/SparkSqlParserSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/SparkSqlParserSuite.scala index 908b955..b32fb90 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/SparkSqlParserSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/SparkSqlParserSuite.scala @@ -19,9 +19,9 @@ package org.apache.spark.sql.execution import org.apache.spark.sql.SaveMode import org.apache.spark.sql.catalyst.{FunctionIdentifier, TableIdentifier} -import org.apache.spark.sql.catalyst.analysis.{UnresolvedAttribute, UnresolvedRelation, UnresolvedStar} +import org.apache.spark.sql.catalyst.analysis.{UnresolvedAlias, UnresolvedAttribute, UnresolvedRelation, UnresolvedStar} import org.apache.spark.sql.catalyst.catalog.{BucketSpec, CatalogStorageFormat, CatalogTable, CatalogTableType} -import org.apache.spark.sql.catalyst.expressions.{Ascending, SortOrder} +import org.apache.spark.sql.catalyst.expressions.{Ascending, Concat, SortOrder} import org.apache.spark.sql.catalyst.parser.ParseException import org.apache.spark.sql.catalyst.plans.PlanTest import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, Project, RepartitionByExpression, Sort} @@ -290,4 +290,15 @@ class SparkSqlParserSuite extends PlanTest { basePlan, numPartitions = newConf.numShufflePartitions))) } + + test("pipeline concatenation") { + val concat = Concat( + Concat(UnresolvedAttribute("a") :: UnresolvedAttribute("b") :: Nil) :: + UnresolvedAttribute("c") :: + Nil + ) + assertEqual( + "SELECT a || b || c FROM t", + Project(UnresolvedAlias(concat) :: Nil, UnresolvedRelation(TableIdentifier("t")))) + } } --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
