This is an automated email from the ASF dual-hosted git repository. yamamuro pushed a commit to branch branch-3.1 in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/branch-3.1 by this push: new 874c404 [SPARK-33100][SQL][FOLLOWUP] Find correct bound of bracketed comment in spark-sql 874c404 is described below commit 874c40429c5f99ab02cdfd928d9d7b6caaea16ea Author: fwang12 <fwan...@ebay.com> AuthorDate: Thu Jan 7 20:49:37 2021 +0900 [SPARK-33100][SQL][FOLLOWUP] Find correct bound of bracketed comment in spark-sql ### What changes were proposed in this pull request? This PR help find correct bound of bracketed comment in spark-sql. Here is the log for UT of SPARK-33100 in CliSuite before: ``` 2021-01-05 13:22:34.768 - stdout> spark-sql> /* SELECT 'test';*/ SELECT 'test'; 2021-01-05 13:22:41.523 - stderr> Time taken: 6.716 seconds, Fetched 1 row(s) 2021-01-05 13:22:41.599 - stdout> test 2021-01-05 13:22:41.6 - stdout> spark-sql> ;;/* SELECT 'test';*/ SELECT 'test'; 2021-01-05 13:22:41.709 - stdout> test 2021-01-05 13:22:41.709 - stdout> spark-sql> /* SELECT 'test';*/;; SELECT 'test'; 2021-01-05 13:22:41.902 - stdout> spark-sql> SELECT 'test'; -- SELECT 'test'; 2021-01-05 13:22:41.902 - stderr> Time taken: 0.129 seconds, Fetched 1 row(s) 2021-01-05 13:22:41.902 - stderr> Error in query: 2021-01-05 13:22:41.902 - stderr> mismatched input '<EOF>' expecting {'(', 'ADD', 'ALTER', 'ANALYZE', 'CACHE', 'CLEAR', 'COMMENT', 'COMMIT', 'CREATE', 'DELETE', 'DESC', 'DESCRIBE', 'DFS', 'DROP', 'EXPLAIN', 'EXPORT', 'FROM', 'GRANT', 'IMPORT', 'INSERT', 'LIST', 'LOAD', 'LOCK', 'MAP', 'MERGE', 'MSCK', 'REDUCE', 'REFRESH', 'REPLACE', 'RESET', 'REVOKE', 'ROLLBACK', 'SELECT', 'SET', 'SHOW', 'START', 'TABLE', 'TRUNCATE', 'UNCACHE', 'UNLOCK', 'UPDATE', 'USE', 'VALUES', 'WITH'}(line 1, pos 19) 2021-01-05 13:22:42.006 - stderr> 2021-01-05 13:22:42.006 - stderr> == SQL == 2021-01-05 13:22:42.006 - stderr> /* SELECT 'test';*/ 2021-01-05 13:22:42.006 - stderr> -------------------^^^ 2021-01-05 13:22:42.006 - stderr> 2021-01-05 13:22:42.006 - stderr> Time taken: 0.226 seconds, Fetched 1 row(s) 2021-01-05 13:22:42.006 - stdout> test ``` The root cause is that the insideBracketedComment is not accurate. For `/* comment */`, the last character `/` is not insideBracketedComment and it would be treat as beginning of statements. In this PR, this issue is fixed. ### Why are the changes needed? To fix the issue described above. ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? Existing UT Closes #31054 from turboFei/SPARK-33100-followup. Authored-by: fwang12 <fwan...@ebay.com> Signed-off-by: Takeshi Yamamuro <yamam...@apache.org> (cherry picked from commit 7b06acc28b5c37da6c48bc44c3d921309d4ad3a8) Signed-off-by: Takeshi Yamamuro <yamam...@apache.org> --- .../sql/hive/thriftserver/SparkSQLCLIDriver.scala | 24 +++++++++++++++------- .../spark/sql/hive/thriftserver/CliSuite.scala | 4 ++-- 2 files changed, 19 insertions(+), 9 deletions(-) diff --git a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLCLIDriver.scala b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLCLIDriver.scala index 9155eac..8606aaa 100644 --- a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLCLIDriver.scala +++ b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLCLIDriver.scala @@ -530,15 +530,24 @@ private[hive] class SparkSQLCLIDriver extends CliDriver with Logging { var bracketedCommentLevel = 0 var escape = false var beginIndex = 0 - var includingStatement = false + var leavingBracketedComment = false + var isStatement = false val ret = new JArrayList[String] def insideBracketedComment: Boolean = bracketedCommentLevel > 0 def insideComment: Boolean = insideSimpleComment || insideBracketedComment - def statementBegin(index: Int): Boolean = includingStatement || (!insideComment && + def statementInProgress(index: Int): Boolean = isStatement || (!insideComment && index > beginIndex && !s"${line.charAt(index)}".trim.isEmpty) for (index <- 0 until line.length) { + // Checks if we need to decrement a bracketed comment level; the last character '/' of + // bracketed comments is still inside the comment, so `insideBracketedComment` must keep true + // in the previous loop and we decrement the level here if needed. + if (leavingBracketedComment) { + bracketedCommentLevel -= 1 + leavingBracketedComment = false + } + if (line.charAt(index) == '\'' && !insideComment) { // take a look to see if it is escaped // See the comment above about SPARK-31595 @@ -568,12 +577,12 @@ private[hive] class SparkSQLCLIDriver extends CliDriver with Logging { if (insideSingleQuote || insideDoubleQuote || insideComment) { // do not split } else { - if (includingStatement) { + if (isStatement) { // split, do not include ; itself ret.add(line.substring(beginIndex, index)) } beginIndex = index + 1 - includingStatement = false + isStatement = false } } else if (line.charAt(index) == '\n') { // with a new line the inline simple comment should end. @@ -585,7 +594,8 @@ private[hive] class SparkSQLCLIDriver extends CliDriver with Logging { if (insideSingleQuote || insideDoubleQuote) { // Ignores '/' in any case of quotes } else if (insideBracketedComment && line.charAt(index - 1) == '*' ) { - bracketedCommentLevel -= 1 + // Decrements `bracketedCommentLevel` at the beginning of the next loop + leavingBracketedComment = true } else if (hasNext && !insideBracketedComment && line.charAt(index + 1) == '*') { bracketedCommentLevel += 1 } @@ -597,9 +607,9 @@ private[hive] class SparkSQLCLIDriver extends CliDriver with Logging { escape = true } - includingStatement = statementBegin(index) + isStatement = statementInProgress(index) } - if (includingStatement) { + if (isStatement) { ret.add(line.substring(beginIndex)) } ret diff --git a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/CliSuite.scala b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/CliSuite.scala index 6708cf9..1a96012 100644 --- a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/CliSuite.scala +++ b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/CliSuite.scala @@ -577,8 +577,8 @@ class CliSuite extends SparkFunSuite with BeforeAndAfterAll with Logging { "/* SELECT 'test';*/ SELECT 'test';" -> "test", ";;/* SELECT 'test';*/ SELECT 'test';" -> "test", "/* SELECT 'test';*/;; SELECT 'test';" -> "test", - "SELECT 'test'; -- SELECT 'test';" -> "", - "SELECT 'test'; /* SELECT 'test';*/;" -> "", + "SELECT 'test'; -- SELECT 'test';" -> "test", + "SELECT 'test'; /* SELECT 'test';*/;" -> "test", "/*$meta chars{^\\;}*/ SELECT 'test';" -> "test", "/*\nmulti-line\n*/ SELECT 'test';" -> "test", "/*/* multi-level bracketed*/ SELECT 'test';" -> "test" --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org