This is an automated email from the ASF dual-hosted git repository. wenchen pushed a commit to branch branch-3.1 in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/branch-3.1 by this push: new 61074aa [SPARK-37389][SQL][3.1] Check unclosed bracketed comments 61074aa is described below commit 61074aa1c17ffca1294215c5516a76799c9eeed5 Author: Jiaan Geng <belie...@163.com> AuthorDate: Thu Nov 25 15:28:14 2021 +0800 [SPARK-37389][SQL][3.1] Check unclosed bracketed comments ### What changes were proposed in this pull request? This PR used to backport https://github.com/apache/spark/pull/34668 to branch 3.1 ### Why are the changes needed? The execute plan is not expected, if we don't check unclosed bracketed comments. ### Does this PR introduce _any_ user-facing change? 'Yes'. The behavior of bracketed comments will more correctly. ### How was this patch tested? New tests. Closes #34696 from beliefer/SPARK-37389-backport-3.1. Authored-by: Jiaan Geng <belie...@163.com> Signed-off-by: Wenchen Fan <wenc...@databricks.com> --- .../apache/spark/sql/catalyst/parser/SqlBase.g4 | 17 +++++- .../spark/sql/catalyst/parser/ParseDriver.scala | 55 +++++++++++++++++++ .../sql/catalyst/parser/PlanParserSuite.scala | 50 +++++++++++++++++ .../test/resources/sql-tests/inputs/comments.sql | 29 ++++++++++ .../resources/sql-tests/results/comments.sql.out | 64 +++++++++++++++++++++- 5 files changed, 213 insertions(+), 2 deletions(-) diff --git a/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 b/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 index a23994f..7463ce2 100644 --- a/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 +++ b/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 @@ -37,6 +37,11 @@ grammar SqlBase; @lexer::members { /** + * When true, parser should throw ParseExcetion for unclosed bracketed comment. + */ + public boolean has_unclosed_bracketed_comment = false; + + /** * Verify whether current token is a valid decimal token (which contains dot). * Returns true if the character that follows the token is not a digit or letter or underscore. * @@ -73,6 +78,16 @@ grammar SqlBase; return false; } } + + /** + * This method will be called when the character stream ends and try to find out the + * unclosed bracketed comment. + * If the method be called, it means the end of the entire character stream match, + * and we set the flag and fail later. + */ + public void markUnclosedComment() { + has_unclosed_bracketed_comment = true; + } } singleStatement @@ -1821,7 +1836,7 @@ SIMPLE_COMMENT ; BRACKETED_COMMENT - : '/*' {!isHint()}? (BRACKETED_COMMENT|.)*? '*/' -> channel(HIDDEN) + : '/*' {!isHint()}? ( BRACKETED_COMMENT | . )*? ('*/' | {markUnclosedComment();} EOF) -> channel(HIDDEN) ; WS diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParseDriver.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParseDriver.scala index d08be46..dc3c0cd 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParseDriver.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParseDriver.scala @@ -94,6 +94,7 @@ abstract class AbstractSqlParser extends ParserInterface with SQLConfHelper with val tokenStream = new CommonTokenStream(lexer) val parser = new SqlBaseParser(tokenStream) parser.addParseListener(PostProcessor) + parser.addParseListener(UnclosedCommentProcessor(command, tokenStream)) parser.removeErrorListeners() parser.addErrorListener(ParseErrorListener) parser.legacy_setops_precedence_enbled = conf.setOpsPrecedenceEnforced @@ -299,3 +300,57 @@ case object PostProcessor extends SqlBaseBaseListener { parent.addChild(new TerminalNodeImpl(f(newToken))) } } + +/** + * The post-processor checks the unclosed bracketed comment. + */ +case class UnclosedCommentProcessor( + command: String, tokenStream: CommonTokenStream) extends SqlBaseBaseListener { + + override def exitSingleDataType(ctx: SqlBaseParser.SingleDataTypeContext): Unit = { + checkUnclosedComment(tokenStream, command) + } + + override def exitSingleExpression(ctx: SqlBaseParser.SingleExpressionContext): Unit = { + checkUnclosedComment(tokenStream, command) + } + + override def exitSingleTableIdentifier(ctx: SqlBaseParser.SingleTableIdentifierContext): Unit = { + checkUnclosedComment(tokenStream, command) + } + + override def exitSingleFunctionIdentifier( + ctx: SqlBaseParser.SingleFunctionIdentifierContext): Unit = { + checkUnclosedComment(tokenStream, command) + } + + override def exitSingleMultipartIdentifier( + ctx: SqlBaseParser.SingleMultipartIdentifierContext): Unit = { + checkUnclosedComment(tokenStream, command) + } + + override def exitSingleTableSchema(ctx: SqlBaseParser.SingleTableSchemaContext): Unit = { + checkUnclosedComment(tokenStream, command) + } + + override def exitQuery(ctx: SqlBaseParser.QueryContext): Unit = { + checkUnclosedComment(tokenStream, command) + } + + override def exitSingleStatement(ctx: SqlBaseParser.SingleStatementContext): Unit = { + checkUnclosedComment(tokenStream, command) + } + + /** check `has_unclosed_bracketed_comment` to find out the unclosed bracketed comment. */ + private def checkUnclosedComment(tokenStream: CommonTokenStream, command: String) = { + assert(tokenStream.getTokenSource.isInstanceOf[SqlBaseLexer]) + val lexer = tokenStream.getTokenSource.asInstanceOf[SqlBaseLexer] + if (lexer.has_unclosed_bracketed_comment) { + // The last token is 'EOF' and the penultimate is unclosed bracketed comment + val failedToken = tokenStream.get(tokenStream.size() - 2) + assert(failedToken.getType() == SqlBaseParser.BRACKETED_COMMENT) + val position = Origin(Option(failedToken.getLine), Option(failedToken.getCharPositionInLine)) + throw new ParseException(Some(command), "Unclosed bracketed comment", position, position) + } + } +} diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/PlanParserSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/PlanParserSuite.scala index 6fef18b..f152c15 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/PlanParserSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/PlanParserSuite.scala @@ -158,6 +158,56 @@ class PlanParserSuite extends AnalysisTest { """.stripMargin, plan) } + test("nested bracketed comment case seven") { + val plan = OneRowRelation().select(Literal(1).as("a")) + assertEqual( + """ + |/*abc*/ + |select 1 as a + |/* + | + |2 as b + |/*abc */ + |, 3 as c + | + |/**/ + |*/ + """.stripMargin, plan) + } + + test("unclosed bracketed comment one") { + val query = """ + |/*abc*/ + |select 1 as a + |/* + | + |2 as b + |/*abc */ + |, 3 as c + | + |/**/ + |""".stripMargin + val e = intercept[ParseException](parsePlan(query)) + assert(e.getMessage.contains(s"Unclosed bracketed comment")) + } + + test("unclosed bracketed comment two") { + val query = """ + |/*abc*/ + |select 1 as a + |/* + | + |2 as b + |/*abc */ + |, 3 as c + | + |/**/ + |select 4 as d + |""".stripMargin + val e = intercept[ParseException](parsePlan(query)) + assert(e.getMessage.contains(s"Unclosed bracketed comment")) + } + test("case insensitive") { val plan = table("a").select(star()) assertEqual("sELEct * FroM a", plan) diff --git a/sql/core/src/test/resources/sql-tests/inputs/comments.sql b/sql/core/src/test/resources/sql-tests/inputs/comments.sql index 19f11de..da5e57a 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/comments.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/comments.sql @@ -88,3 +88,32 @@ Other information of first level. /*/**/*/ SELECT 'selected content' AS tenth; --QUERY-DELIMITER-END + +-- the first case of unclosed bracketed comment +--QUERY-DELIMITER-START +/*abc*/ +select 1 as a +/* + +2 as b +/*abc*/ +, 3 as c + +/**/ +; +--QUERY-DELIMITER-END + +-- the second case of unclosed bracketed comment +--QUERY-DELIMITER-START +/*abc*/ +select 1 as a +/* + +2 as b +/*abc*/ +, 3 as c + +/**/ +select 4 as d +; +--QUERY-DELIMITER-END diff --git a/sql/core/src/test/resources/sql-tests/results/comments.sql.out b/sql/core/src/test/resources/sql-tests/results/comments.sql.out index fd58a33..da9dbd5 100644 --- a/sql/core/src/test/resources/sql-tests/results/comments.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/comments.sql.out @@ -1,5 +1,5 @@ -- Automatically generated by SQLQueryTestSuite --- Number of queries: 10 +-- Number of queries: 12 -- !query @@ -119,3 +119,65 @@ SELECT 'selected content' AS tenth struct<tenth:string> -- !query output selected content + + +-- !query +/*abc*/ +select 1 as a +/* + +2 as b +/*abc*/ +, 3 as c + +/**/ +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.catalyst.parser.ParseException + +Unclosed bracketed comment(line 3, pos 0) + +== SQL == +/*abc*/ +select 1 as a +/* +^^^ + +2 as b +/*abc*/ +, 3 as c + +/**/ + + +-- !query +/*abc*/ +select 1 as a +/* + +2 as b +/*abc*/ +, 3 as c + +/**/ +select 4 as d +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.catalyst.parser.ParseException + +Unclosed bracketed comment(line 3, pos 0) + +== SQL == +/*abc*/ +select 1 as a +/* +^^^ + +2 as b +/*abc*/ +, 3 as c + +/**/ +select 4 as d --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org