This is an automated email from the ASF dual-hosted git repository. wenchen pushed a commit to branch branch-3.2 in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/branch-3.2 by this push: new ad5ac3a [SPARK-37389][SQL][FOLLOWUP] SET command shuold not parse comments ad5ac3a is described below commit ad5ac3a22337b04fdd3413d148873f0d1077a0ca Author: Wenchen Fan <wenc...@databricks.com> AuthorDate: Wed Dec 1 16:33:52 2021 +0800 [SPARK-37389][SQL][FOLLOWUP] SET command shuold not parse comments ### What changes were proposed in this pull request? This PR is a followup of https://github.com/apache/spark/pull/34668 , to fix a breaking change. The SET command uses wildcard which may contain unclosed comment, e.g. `/path/to/*`, and we shouldn't fail it. This PR fixes it by skipping the unclosed comment check if we are parsing SET command. ### Why are the changes needed? fix a breaking change ### Does this PR introduce _any_ user-facing change? no, the breaking change is not released yet. ### How was this patch tested? new tests Closes #34763 from cloud-fan/set. Authored-by: Wenchen Fan <wenc...@databricks.com> Signed-off-by: Wenchen Fan <wenc...@databricks.com> (cherry picked from commit eaa135870a30fb89c2f1087991328a6f72a1860c) Signed-off-by: Wenchen Fan <wenc...@databricks.com> --- .../apache/spark/sql/catalyst/parser/SqlBase.g4 | 2 +- .../spark/sql/catalyst/parser/ParseDriver.scala | 6 ++- .../spark/sql/errors/QueryParsingErrors.scala | 6 +-- .../spark/sql/execution/SparkSqlParser.scala | 49 ++++++++++++---------- .../spark/sql/execution/SparkSqlParserSuite.scala | 9 ++++ 5 files changed, 44 insertions(+), 28 deletions(-) diff --git a/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 b/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 index 1aa89ac..319536c 100644 --- a/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 +++ b/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 @@ -254,7 +254,7 @@ statement | SET TIME ZONE timezone=(STRING | LOCAL) #setTimeZone | SET TIME ZONE .*? #setTimeZone | SET configKey EQ configValue #setQuotedConfiguration - | SET configKey (EQ .*?)? #setQuotedConfiguration + | SET configKey (EQ .*?)? #setConfiguration | SET .*? EQ configValue #setQuotedConfiguration | SET .*? #setConfiguration | RESET configKey #resetQuotedConfiguration diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParseDriver.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParseDriver.scala index 7dcf41b..c6633b1 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParseDriver.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParseDriver.scala @@ -347,7 +347,11 @@ case class UnclosedCommentProcessor( } override def exitSingleStatement(ctx: SqlBaseParser.SingleStatementContext): Unit = { - checkUnclosedComment(tokenStream, command) + // SET command uses a wildcard to match anything, and we shouldn't parse the comments, e.g. + // `SET myPath =/a/*`. + if (!ctx.statement().isInstanceOf[SqlBaseParser.SetConfigurationContext]) { + checkUnclosedComment(tokenStream, command) + } } /** check `has_unclosed_bracketed_comment` to find out the unclosed bracketed comment. */ diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryParsingErrors.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryParsingErrors.scala index 86fd41f..b6a8163 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryParsingErrors.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryParsingErrors.scala @@ -328,7 +328,7 @@ object QueryParsingErrors { new ParseException(errorClass = "DUPLICATE_KEY", messageParameters = Array(key), ctx) } - def unexpectedFomatForSetConfigurationError(ctx: SetConfigurationContext): Throwable = { + def unexpectedFomatForSetConfigurationError(ctx: ParserRuleContext): Throwable = { new ParseException( s""" |Expected format is 'SET', 'SET key', or 'SET key=value'. If you want to include @@ -338,13 +338,13 @@ object QueryParsingErrors { } def invalidPropertyKeyForSetQuotedConfigurationError( - keyCandidate: String, valueStr: String, ctx: SetQuotedConfigurationContext): Throwable = { + keyCandidate: String, valueStr: String, ctx: ParserRuleContext): Throwable = { new ParseException(s"'$keyCandidate' is an invalid property key, please " + s"use quotes, e.g. SET `$keyCandidate`=`$valueStr`", ctx) } def invalidPropertyValueForSetQuotedConfigurationError( - valueCandidate: String, keyStr: String, ctx: SetQuotedConfigurationContext): Throwable = { + valueCandidate: String, keyStr: String, ctx: ParserRuleContext): Throwable = { new ParseException(s"'$valueCandidate' is an invalid property value, please " + s"use quotes, e.g. SET `$keyStr`=`$valueCandidate`", ctx) } diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala index ea74ce6..dc5d865 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala @@ -71,24 +71,38 @@ class SparkSqlAstBuilder extends AstBuilder { * character in the raw string. */ override def visitSetConfiguration(ctx: SetConfigurationContext): LogicalPlan = withOrigin(ctx) { - remainder(ctx.SET.getSymbol).trim match { - case configKeyValueDef(key, value) => - SetCommand(Some(key -> Option(value.trim))) - case configKeyDef(key) => - SetCommand(Some(key -> None)) - case s if s == "-v" => - SetCommand(Some("-v" -> None)) - case s if s.isEmpty => - SetCommand(None) - case _ => throw QueryParsingErrors.unexpectedFomatForSetConfigurationError(ctx) + if (ctx.configKey() != null) { + val keyStr = ctx.configKey().getText + if (ctx.EQ() != null) { + remainder(ctx.EQ().getSymbol).trim match { + case configValueDef(valueStr) => SetCommand(Some(keyStr -> Option(valueStr))) + case other => throw QueryParsingErrors.invalidPropertyValueForSetQuotedConfigurationError( + other, keyStr, ctx) + } + } else { + SetCommand(Some(keyStr -> None)) + } + } else { + remainder(ctx.SET.getSymbol).trim match { + case configKeyValueDef(key, value) => + SetCommand(Some(key -> Option(value.trim))) + case configKeyDef(key) => + SetCommand(Some(key -> None)) + case s if s == "-v" => + SetCommand(Some("-v" -> None)) + case s if s.isEmpty => + SetCommand(None) + case _ => throw QueryParsingErrors.unexpectedFomatForSetConfigurationError(ctx) + } } } override def visitSetQuotedConfiguration( ctx: SetQuotedConfigurationContext): LogicalPlan = withOrigin(ctx) { - if (ctx.configValue() != null && ctx.configKey() != null) { + assert(ctx.configValue() != null) + if (ctx.configKey() != null) { SetCommand(Some(ctx.configKey().getText -> Option(ctx.configValue().getText))) - } else if (ctx.configValue() != null) { + } else { val valueStr = ctx.configValue().getText val keyCandidate = interval(ctx.SET().getSymbol, ctx.EQ().getSymbol).trim keyCandidate match { @@ -96,17 +110,6 @@ class SparkSqlAstBuilder extends AstBuilder { case _ => throw QueryParsingErrors.invalidPropertyKeyForSetQuotedConfigurationError( keyCandidate, valueStr, ctx) } - } else { - val keyStr = ctx.configKey().getText - if (ctx.EQ() != null) { - remainder(ctx.EQ().getSymbol).trim match { - case configValueDef(valueStr) => SetCommand(Some(keyStr -> Option(valueStr))) - case other => throw QueryParsingErrors.invalidPropertyValueForSetQuotedConfigurationError( - other, keyStr, ctx) - } - } else { - SetCommand(Some(keyStr -> None)) - } } } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/SparkSqlParserSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/SparkSqlParserSuite.scala index e7d630d..ba6dd17 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/SparkSqlParserSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/SparkSqlParserSuite.scala @@ -23,6 +23,7 @@ import org.apache.spark.internal.config.ConfigEntry import org.apache.spark.sql.catalyst.{FunctionIdentifier, TableIdentifier} import org.apache.spark.sql.catalyst.analysis.{AnalysisTest, UnresolvedAlias, UnresolvedAttribute, UnresolvedFunction, UnresolvedGenerator, UnresolvedHaving, UnresolvedRelation, UnresolvedStar} import org.apache.spark.sql.catalyst.expressions.{Ascending, AttributeReference, Concat, GreaterThan, Literal, NullsFirst, SortOrder, UnresolvedWindowExpression, UnspecifiedFrame, WindowSpecDefinition, WindowSpecReference} +import org.apache.spark.sql.catalyst.parser.ParseException import org.apache.spark.sql.catalyst.plans.logical._ import org.apache.spark.sql.connector.catalog.TableCatalog import org.apache.spark.sql.execution.command._ @@ -73,6 +74,14 @@ class SparkSqlParserSuite extends AnalysisTest { } } + test("SET with comment") { + assertEqual(s"SET my_path = /a/b/*", SetCommand(Some("my_path" -> Some("/a/b/*")))) + val e1 = intercept[ParseException](parser.parsePlan("SET k=`v` /*")) + assert(e1.getMessage.contains(s"Unclosed bracketed comment")) + val e2 = intercept[ParseException](parser.parsePlan("SET `k`=`v` /*")) + assert(e2.getMessage.contains(s"Unclosed bracketed comment")) + } + test("Report Error for invalid usage of SET command") { assertEqual("SET", SetCommand(None)) assertEqual("SET -v", SetCommand(Some("-v", None))) --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org