Repository: spark Updated Branches: refs/heads/branch-2.3 25ea27b09 -> fc3df4517
[SPARK-24536] Validate that an evaluated limit clause cannot be null It proposes a version in which nullable expressions are not valid in the limit clause It was tested with unit and e2e tests. Please review http://spark.apache.org/contributing.html before opening a pull request. Author: Mauro Palsgraaf <mauropalsgr...@hotmail.com> Closes #21807 from mauropalsgraaf/SPARK-24536. (cherry picked from commit 4ac2126bc64bad1b4cbe1c697b4bcafacd67c96c) Signed-off-by: Xiao Li <gatorsm...@gmail.com> Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/fc3df451 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/fc3df451 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/fc3df451 Branch: refs/heads/branch-2.3 Commit: fc3df45177d176cc0fe43049b6f8df372f7ea0e0 Parents: 25ea27b Author: Mauro Palsgraaf <mauropalsgr...@hotmail.com> Authored: Tue Jul 31 08:18:08 2018 -0700 Committer: Xiao Li <gatorsm...@gmail.com> Committed: Tue Jul 31 08:22:25 2018 -0700 ---------------------------------------------------------------------- .../sql/catalyst/analysis/CheckAnalysis.scala | 14 +++--- .../catalyst/analysis/AnalysisErrorSuite.scala | 6 +++ .../test/resources/sql-tests/inputs/limit.sql | 5 +++ .../resources/sql-tests/results/limit.sql.out | 45 ++++++++++++++------ 4 files changed, 51 insertions(+), 19 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/spark/blob/fc3df451/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala ---------------------------------------------------------------------- diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala index 0d189b4..beb11d7 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala @@ -66,11 +66,15 @@ trait CheckAnalysis extends PredicateHelper { limitExpr.sql) case e if e.dataType != IntegerType => failAnalysis( s"The limit expression must be integer type, but got " + - e.dataType.simpleString) - case e if e.eval().asInstanceOf[Int] < 0 => failAnalysis( - "The limit expression must be equal to or greater than 0, but got " + - e.eval().asInstanceOf[Int]) - case e => // OK + e.dataType.catalogString) + case e => + e.eval() match { + case null => failAnalysis( + s"The evaluated limit expression must not be null, but got ${limitExpr.sql}") + case v: Int if v < 0 => failAnalysis( + s"The limit expression must be equal to or greater than 0, but got $v") + case _ => // OK + } } } http://git-wip-us.apache.org/repos/asf/spark/blob/fc3df451/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisErrorSuite.scala ---------------------------------------------------------------------- diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisErrorSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisErrorSuite.scala index 5d2f8e7..70325b8 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisErrorSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisErrorSuite.scala @@ -393,6 +393,12 @@ class AnalysisErrorSuite extends AnalysisTest { ) errorTest( + "an evaluated limit class must not be null", + testRelation.limit(Literal(null, IntegerType)), + "The evaluated limit expression must not be null, but got " :: Nil + ) + + errorTest( "num_rows in limit clause must be equal to or greater than 0", listRelation.limit(-1), "The limit expression must be equal to or greater than 0, but got -1" :: Nil http://git-wip-us.apache.org/repos/asf/spark/blob/fc3df451/sql/core/src/test/resources/sql-tests/inputs/limit.sql ---------------------------------------------------------------------- diff --git a/sql/core/src/test/resources/sql-tests/inputs/limit.sql b/sql/core/src/test/resources/sql-tests/inputs/limit.sql index f21912a..b4c73cf 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/limit.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/limit.sql @@ -13,6 +13,11 @@ SELECT * FROM testdata LIMIT CAST(1 AS int); SELECT * FROM testdata LIMIT -1; SELECT * FROM testData TABLESAMPLE (-1 ROWS); + +SELECT * FROM testdata LIMIT CAST(1 AS INT); +-- evaluated limit must not be null +SELECT * FROM testdata LIMIT CAST(NULL AS INT); + -- limit must be foldable SELECT * FROM testdata LIMIT key > 3; http://git-wip-us.apache.org/repos/asf/spark/blob/fc3df451/sql/core/src/test/resources/sql-tests/results/limit.sql.out ---------------------------------------------------------------------- diff --git a/sql/core/src/test/resources/sql-tests/results/limit.sql.out b/sql/core/src/test/resources/sql-tests/results/limit.sql.out index 146abe6..02fe1de 100644 --- a/sql/core/src/test/resources/sql-tests/results/limit.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/limit.sql.out @@ -1,5 +1,5 @@ -- Automatically generated by SQLQueryTestSuite --- Number of queries: 12 +-- Number of queries: 14 -- !query 0 @@ -66,44 +66,61 @@ The limit expression must be equal to or greater than 0, but got -1; -- !query 7 -SELECT * FROM testdata LIMIT key > 3 +SELECT * FROM testdata LIMIT CAST(1 AS INT) -- !query 7 schema -struct<> +struct<key:int,value:string> -- !query 7 output -org.apache.spark.sql.AnalysisException -The limit expression must evaluate to a constant value, but got (testdata.`key` > 3); +1 1 -- !query 8 -SELECT * FROM testdata LIMIT true +SELECT * FROM testdata LIMIT CAST(NULL AS INT) -- !query 8 schema struct<> -- !query 8 output org.apache.spark.sql.AnalysisException -The limit expression must be integer type, but got boolean; +The evaluated limit expression must not be null, but got CAST(NULL AS INT); -- !query 9 -SELECT * FROM testdata LIMIT 'a' +SELECT * FROM testdata LIMIT key > 3 -- !query 9 schema struct<> -- !query 9 output org.apache.spark.sql.AnalysisException -The limit expression must be integer type, but got string; +The limit expression must evaluate to a constant value, but got (testdata.`key` > 3); -- !query 10 -SELECT * FROM (SELECT * FROM range(10) LIMIT 5) WHERE id > 3 +SELECT * FROM testdata LIMIT true -- !query 10 schema -struct<id:bigint> +struct<> -- !query 10 output -4 +org.apache.spark.sql.AnalysisException +The limit expression must be integer type, but got boolean; -- !query 11 -SELECT * FROM testdata WHERE key < 3 LIMIT ALL +SELECT * FROM testdata LIMIT 'a' -- !query 11 schema -struct<key:int,value:string> +struct<> -- !query 11 output +org.apache.spark.sql.AnalysisException +The limit expression must be integer type, but got string; + + +-- !query 12 +SELECT * FROM (SELECT * FROM range(10) LIMIT 5) WHERE id > 3 +-- !query 12 schema +struct<id:bigint> +-- !query 12 output +4 + + +-- !query 13 +SELECT * FROM testdata WHERE key < 3 LIMIT ALL +-- !query 13 schema +struct<key:int,value:string> +-- !query 13 output 1 1 2 2 --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org