maropu commented on a change in pull request #29146:
URL: https://github.com/apache/spark/pull/29146#discussion_r461230902
##########
File path:
sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala
##########
@@ -66,17 +69,21 @@ class SparkSqlAstBuilder(conf: SQLConf) extends
AstBuilder(conf) {
* character in the raw string.
*/
override def visitSetConfiguration(ctx: SetConfigurationContext):
LogicalPlan = withOrigin(ctx) {
- // Construct the command.
- val raw = remainder(ctx.SET.getSymbol)
- val keyValueSeparatorIndex = raw.indexOf('=')
- if (keyValueSeparatorIndex >= 0) {
- val key = raw.substring(0, keyValueSeparatorIndex).trim
- val value = raw.substring(keyValueSeparatorIndex + 1).trim
- SetCommand(Some(key -> Option(value)))
- } else if (raw.nonEmpty) {
- SetCommand(Some(raw.trim -> None))
+ if (ctx.configKey() != null) {
+ val keyStr = normalizeConfigString(ctx.configKey().getText)
+ if (ctx.configValue() != null) {
+ SetCommand(Some(keyStr ->
Option(normalizeConfigString(ctx.configValue().getText))))
+ } else {
+ SetCommand(Some(keyStr -> None))
+ }
} else {
- SetCommand(None)
+ remainder(ctx.SET().getSymbol).trim match {
+ case "-v" => SetCommand(Some("-v" -> None))
+ case s if s.isEmpty() => SetCommand(None)
+ case _ => throw new ParseException("Expected format is 'SET', 'SET
key', or " +
+ "'SET key=value'. If you want to include spaces in key and value,
please use quotes, " +
+ "e.g., SET \"ke y\"=`va lu e`.", ctx)
Review comment:
Updated.
##########
File path:
sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala
##########
@@ -66,17 +69,21 @@ class SparkSqlAstBuilder(conf: SQLConf) extends
AstBuilder(conf) {
* character in the raw string.
*/
override def visitSetConfiguration(ctx: SetConfigurationContext):
LogicalPlan = withOrigin(ctx) {
- // Construct the command.
- val raw = remainder(ctx.SET.getSymbol)
- val keyValueSeparatorIndex = raw.indexOf('=')
- if (keyValueSeparatorIndex >= 0) {
- val key = raw.substring(0, keyValueSeparatorIndex).trim
- val value = raw.substring(keyValueSeparatorIndex + 1).trim
- SetCommand(Some(key -> Option(value)))
- } else if (raw.nonEmpty) {
- SetCommand(Some(raw.trim -> None))
+ if (ctx.configKey() != null) {
+ val keyStr = normalizeConfigString(ctx.configKey().getText)
Review comment:
No. At least, we don't have such an existing test case.
##########
File path:
sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala
##########
@@ -66,17 +69,21 @@ class SparkSqlAstBuilder(conf: SQLConf) extends
AstBuilder(conf) {
* character in the raw string.
*/
override def visitSetConfiguration(ctx: SetConfigurationContext):
LogicalPlan = withOrigin(ctx) {
- // Construct the command.
- val raw = remainder(ctx.SET.getSymbol)
- val keyValueSeparatorIndex = raw.indexOf('=')
- if (keyValueSeparatorIndex >= 0) {
- val key = raw.substring(0, keyValueSeparatorIndex).trim
- val value = raw.substring(keyValueSeparatorIndex + 1).trim
- SetCommand(Some(key -> Option(value)))
- } else if (raw.nonEmpty) {
- SetCommand(Some(raw.trim -> None))
+ if (ctx.configKey() != null) {
+ val keyStr = normalizeConfigString(ctx.configKey().getText)
+ if (ctx.configValue() != null) {
+ SetCommand(Some(keyStr ->
Option(normalizeConfigString(ctx.configValue().getText))))
+ } else {
+ SetCommand(Some(keyStr -> None))
+ }
} else {
- SetCommand(None)
+ remainder(ctx.SET().getSymbol).trim match {
+ case "-v" => SetCommand(Some("-v" -> None))
+ case s if s.isEmpty() => SetCommand(None)
+ case _ => throw new ParseException("Expected format is 'SET', 'SET
key', or " +
+ "'SET key=value'. If you want to include spaces in key and value,
please use quotes, " +
+ "e.g., SET \"ke y\"=`va lu e`.", ctx)
Review comment:
Updated.
##########
File path:
sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala
##########
@@ -58,6 +58,9 @@ class SparkSqlParser(conf: SQLConf) extends
AbstractSqlParser(conf) {
class SparkSqlAstBuilder(conf: SQLConf) extends AstBuilder(conf) {
import org.apache.spark.sql.catalyst.parser.ParserUtils._
+ private def normalizeConfigString(s: String) =
Review comment:
I looked into `tableIdentifier`, but I couldn't find why back-quotes are
removed in the case. I replaced `BACKQUOTED_IDENTIFIER` with `quotedIdentifier`
in [this
commit](https://github.com/apache/spark/pull/29146/commits/08feee31c5d21aa75f9981efe59f583d46042cb3),
then it seems back-quotes are removed by the ANTLR parser. Do you know
something about that?
##########
File path:
sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4
##########
@@ -244,11 +244,19 @@ statement
| SET TIME ZONE interval
#setTimeZone
| SET TIME ZONE timezone=(STRING | LOCAL)
#setTimeZone
| SET TIME ZONE .*?
#setTimeZone
+ | SET configKey (EQ value=.+)?
#setConfiguration
| SET .*?
#setConfiguration
+ | RESET configKey?
#resetConfiguration
| RESET .*?
#resetConfiguration
| unsupportedHiveNativeCommands .*?
#failNativeCommand
;
+configKey
+ : IDENTIFIER (('.' | ':') IDENTIFIER)*
Review comment:
Currently, this rule leads to some test failures;
```
sbt.ForkMain$ForkError: org.apache.spark.sql.catalyst.parser.ParseException:
Expected format is 'SET', 'SET key', or 'SET key=value'. If you want to
include special characters in key and value, please use quotes or string
literal, e.g., SET "ke y"=value.(line 1, pos 0)
== SQL ==
set hive.fetch.task.conversion=more
^^^
```
https://amplab.cs.berkeley.edu/jenkins/job/SparkPullRequestBuilder/126674/testReport/org.apache.spark.sql.hive.execution/HiveCompatibilitySuite/binary_constant/
This is because `IDENTIFIER` seems to be conflict with the existing ANTLR
token, e.g., `fetch` in the example above. I'm currently looking for another
smart rule to avoid it. Please let me know if someone has better idea..
##########
File path:
sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4
##########
@@ -244,11 +244,19 @@ statement
| SET TIME ZONE interval
#setTimeZone
| SET TIME ZONE timezone=(STRING | LOCAL)
#setTimeZone
| SET TIME ZONE .*?
#setTimeZone
+ | SET configKey (EQ value=.+)?
#setConfiguration
| SET .*?
#setConfiguration
+ | RESET configKey?
#resetConfiguration
| RESET .*?
#resetConfiguration
| unsupportedHiveNativeCommands .*?
#failNativeCommand
;
+configKey
+ : IDENTIFIER (('.' | ':') IDENTIFIER)*
+ | quotedIdentifier
+ | STRING
Review comment:
ok, I also think the simpler rule looks better. I'll update it based on
your suggestion.
##########
File path:
sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4
##########
@@ -244,11 +244,19 @@ statement
| SET TIME ZONE interval
#setTimeZone
| SET TIME ZONE timezone=(STRING | LOCAL)
#setTimeZone
| SET TIME ZONE .*?
#setTimeZone
+ | SET configKey (EQ value=.+)?
#setConfiguration
| SET .*?
#setConfiguration
+ | RESET configKey?
#resetConfiguration
| RESET .*?
#resetConfiguration
| unsupportedHiveNativeCommands .*?
#failNativeCommand
;
+configKey
+ : IDENTIFIER (('.' | ':') IDENTIFIER)*
+ | quotedIdentifier
+ | STRING
Review comment:
ok, I think the simpler rule looks better. I'll update it based on your
suggestion.
##########
File path:
sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala
##########
@@ -58,6 +58,9 @@ class SparkSqlParser(conf: SQLConf) extends
AbstractSqlParser(conf) {
class SparkSqlAstBuilder(conf: SQLConf) extends AstBuilder(conf) {
import org.apache.spark.sql.catalyst.parser.ParserUtils._
+ private def normalizeConfigString(s: String) =
Review comment:
Oh, nice. I see...
##########
File path:
sql/core/src/test/scala/org/apache/spark/sql/execution/SparkSqlParserSuite.scala
##########
@@ -61,6 +61,39 @@ class SparkSqlParserSuite extends AnalysisTest {
private def intercept(sqlCommand: String, messages: String*): Unit =
interceptParseException(parser.parsePlan)(sqlCommand, messages: _*)
+ test("Report Error for invalid usage of SET command") {
+ assertEqual("SET", SetCommand(None))
+ assertEqual("SET -v", SetCommand(Some("-v", None)))
+ assertEqual("SET spark.sql.key", SetCommand(Some("spark.sql.key" -> None)))
+ assertEqual("SET spark:sql:key=false", SetCommand(Some("spark:sql:key" ->
Some("false"))))
+ assertEqual("SET spark.sql. key=value",
Review comment:
hm, I'll try. But, it seems we already accept similar patterns, e.g.,
```
scala> sql("select * from default.t").show()
+---+
| a|
+---+
| 4|
| 5|
| 1|
| 2|
| 3|
+---+
scala> sql("select * from default .t").show()
+---+
| a|
+---+
| 4|
| 5|
| 1|
| 2|
| 3|
+---+
scala> sql("select * from default . t").show()
+---+
| a|
+---+
| 4|
| 5|
| 1|
| 2|
| 3|
+---+
```
Should these also be prohibited? (But, I think it is hard to do so though...)
##########
File path:
sql/core/src/test/scala/org/apache/spark/sql/execution/SparkSqlParserSuite.scala
##########
@@ -61,6 +61,39 @@ class SparkSqlParserSuite extends AnalysisTest {
private def intercept(sqlCommand: String, messages: String*): Unit =
interceptParseException(parser.parsePlan)(sqlCommand, messages: _*)
+ test("Report Error for invalid usage of SET command") {
+ assertEqual("SET", SetCommand(None))
+ assertEqual("SET -v", SetCommand(Some("-v", None)))
+ assertEqual("SET spark.sql.key", SetCommand(Some("spark.sql.key" -> None)))
+ assertEqual("SET spark:sql:key=false", SetCommand(Some("spark:sql:key" ->
Some("false"))))
+ assertEqual("SET spark.sql. key=value",
Review comment:
hm, I'll check it. But, it seems we already accept similar patterns,
e.g.,
```
scala> sql("select * from default.t").show()
+---+
| a|
+---+
| 4|
| 5|
| 1|
| 2|
| 3|
+---+
scala> sql("select * from default .t").show()
+---+
| a|
+---+
| 4|
| 5|
| 1|
| 2|
| 3|
+---+
scala> sql("select * from default . t").show()
+---+
| a|
+---+
| 4|
| 5|
| 1|
| 2|
| 3|
+---+
```
Should these also be prohibited? (But, I think it is hard to do so though...)
----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]