Repository: spark
Updated Branches:
refs/heads/master eb7a5a66b -> 17bdc36ef
[SPARK-21102][SQL] Refresh command is too aggressive in parsing
### Idea
This PR adds validation to REFRESH sql statements. Currently, users can specify
whatever they want as resource path. For example, spark.sql("REFRESH ! $ !")
will be executed without any exceptions.
### Implementation
I am not sure that my current implementation is the most optimal, so any
feedback is appreciated. My first idea was to make the grammar as strict as
possible. Unfortunately, there were some problems. I tried the approach below:
SqlBase.g4
```
...
| REFRESH TABLE tableIdentifier
#refreshTable
| REFRESH resourcePath
#refreshResource
...
resourcePath
: STRING
| (IDENTIFIER | number | nonReserved | '/' | '-')+ // other symbols can be
added if needed
;
```
It is not flexible enough and requires to explicitly mention all possible
symbols. Therefore, I came up with the current approach that is implemented in
the code.
Let me know your opinion on which one is better.
Author: aokolnychyi <[email protected]>
Closes #18368 from aokolnychyi/spark-21102.
Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/17bdc36e
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/17bdc36e
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/17bdc36e
Branch: refs/heads/master
Commit: 17bdc36ef16a544b693c628db276fe32db87fe7a
Parents: eb7a5a6
Author: aokolnychyi <[email protected]>
Authored: Mon Jul 3 09:35:49 2017 -0700
Committer: gatorsmile <[email protected]>
Committed: Mon Jul 3 09:35:49 2017 -0700
----------------------------------------------------------------------
.../apache/spark/sql/catalyst/parser/SqlBase.g4 | 2 +-
.../spark/sql/execution/SparkSqlParser.scala | 20 ++++++++++++++++---
.../sql/execution/SparkSqlParserSuite.scala | 21 +++++++++++++++++++-
3 files changed, 38 insertions(+), 5 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/spark/blob/17bdc36e/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4
----------------------------------------------------------------------
diff --git
a/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4
b/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4
index 7ffa150..29f5544 100644
---
a/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4
+++
b/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4
@@ -149,7 +149,7 @@ statement
| (DESC | DESCRIBE) TABLE? option=(EXTENDED | FORMATTED)?
tableIdentifier partitionSpec? describeColName?
#describeTable
| REFRESH TABLE tableIdentifier
#refreshTable
- | REFRESH .*?
#refreshResource
+ | REFRESH (STRING | .*?)
#refreshResource
| CACHE LAZY? TABLE tableIdentifier (AS? query)?
#cacheTable
| UNCACHE TABLE (IF EXISTS)? tableIdentifier
#uncacheTable
| CLEAR CACHE
#clearCache
http://git-wip-us.apache.org/repos/asf/spark/blob/17bdc36e/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala
----------------------------------------------------------------------
diff --git
a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala
b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala
index 3c58c6e..2b79eb5 100644
---
a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala
+++
b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala
@@ -230,11 +230,25 @@ class SparkSqlAstBuilder(conf: SQLConf) extends
AstBuilder(conf) {
}
/**
- * Create a [[RefreshTable]] logical plan.
+ * Create a [[RefreshResource]] logical plan.
*/
override def visitRefreshResource(ctx: RefreshResourceContext): LogicalPlan
= withOrigin(ctx) {
- val resourcePath = remainder(ctx.REFRESH.getSymbol).trim
- RefreshResource(resourcePath)
+ val path = if (ctx.STRING != null) string(ctx.STRING) else
extractUnquotedResourcePath(ctx)
+ RefreshResource(path)
+ }
+
+ private def extractUnquotedResourcePath(ctx: RefreshResourceContext): String
= withOrigin(ctx) {
+ val unquotedPath = remainder(ctx.REFRESH.getSymbol).trim
+ validate(
+ unquotedPath != null && !unquotedPath.isEmpty,
+ "Resource paths cannot be empty in REFRESH statements. Use / to match
everything",
+ ctx)
+ val forbiddenSymbols = Seq(" ", "\n", "\r", "\t")
+ validate(
+ !forbiddenSymbols.exists(unquotedPath.contains(_)),
+ "REFRESH statements cannot contain ' ', '\\n', '\\r', '\\t' inside
unquoted resource paths",
+ ctx)
+ unquotedPath
}
/**
http://git-wip-us.apache.org/repos/asf/spark/blob/17bdc36e/sql/core/src/test/scala/org/apache/spark/sql/execution/SparkSqlParserSuite.scala
----------------------------------------------------------------------
diff --git
a/sql/core/src/test/scala/org/apache/spark/sql/execution/SparkSqlParserSuite.scala
b/sql/core/src/test/scala/org/apache/spark/sql/execution/SparkSqlParserSuite.scala
index bd9c2eb..d238c76 100644
---
a/sql/core/src/test/scala/org/apache/spark/sql/execution/SparkSqlParserSuite.scala
+++
b/sql/core/src/test/scala/org/apache/spark/sql/execution/SparkSqlParserSuite.scala
@@ -25,7 +25,7 @@ import org.apache.spark.sql.catalyst.expressions.{Ascending,
Concat, SortOrder}
import org.apache.spark.sql.catalyst.parser.ParseException
import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, Project,
RepartitionByExpression, Sort}
import org.apache.spark.sql.execution.command._
-import org.apache.spark.sql.execution.datasources.CreateTable
+import org.apache.spark.sql.execution.datasources.{CreateTable,
RefreshResource}
import org.apache.spark.sql.internal.{HiveSerDe, SQLConf}
import org.apache.spark.sql.types.{IntegerType, LongType, StringType,
StructType}
@@ -66,6 +66,25 @@ class SparkSqlParserSuite extends AnalysisTest {
}
}
+ test("refresh resource") {
+ assertEqual("REFRESH prefix_path", RefreshResource("prefix_path"))
+ assertEqual("REFRESH /", RefreshResource("/"))
+ assertEqual("REFRESH /path///a", RefreshResource("/path///a"))
+ assertEqual("REFRESH pat1h/112/_1a", RefreshResource("pat1h/112/_1a"))
+ assertEqual("REFRESH pat1h/112/_1a/a-1",
RefreshResource("pat1h/112/_1a/a-1"))
+ assertEqual("REFRESH path-with-dash", RefreshResource("path-with-dash"))
+ assertEqual("REFRESH \'path with space\'", RefreshResource("path with
space"))
+ assertEqual("REFRESH \"path with space 2\"", RefreshResource("path with
space 2"))
+ intercept("REFRESH a b", "REFRESH statements cannot contain")
+ intercept("REFRESH a\tb", "REFRESH statements cannot contain")
+ intercept("REFRESH a\nb", "REFRESH statements cannot contain")
+ intercept("REFRESH a\rb", "REFRESH statements cannot contain")
+ intercept("REFRESH a\r\nb", "REFRESH statements cannot contain")
+ intercept("REFRESH @ $a$", "REFRESH statements cannot contain")
+ intercept("REFRESH ", "Resource paths cannot be empty in REFRESH
statements")
+ intercept("REFRESH", "Resource paths cannot be empty in REFRESH
statements")
+ }
+
test("show functions") {
assertEqual("show functions", ShowFunctionsCommand(None, None, true, true))
assertEqual("show all functions", ShowFunctionsCommand(None, None, true,
true))
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]