[spark] branch master updated: [SPARK-26976][SQL] Forbid reserved keywords as identifiers when ANSI mode is on

yamamuro Tue, 12 Mar 2019 19:21:23 -0700

This is an automated email from the ASF dual-hosted git repository.

yamamuro pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git



The following commit(s) were added to refs/heads/master by this push:
     new 1e9469b  [SPARK-26976][SQL] Forbid reserved keywords as identifiers 
when ANSI mode is on
1e9469b is described below

commit 1e9469bb7a71b06d610edaaebca933f4219a6eb3
Author: Takeshi Yamamuro <[email protected]>
AuthorDate: Wed Mar 13 11:20:27 2019 +0900

    [SPARK-26976][SQL] Forbid reserved keywords as identifiers when ANSI mode 
is on
    
    ## What changes were proposed in this pull request?
    This pr added code to forbid reserved keywords as identifiers when ANSI 
mode is on.
    This is a follow-up of SPARK-26215(#23259).
    
    ## How was this patch tested?
    Added tests in `TableIdentifierParserSuite`.
    
    Closes #23880 from maropu/SPARK-26976.
    
    Authored-by: Takeshi Yamamuro <[email protected]>
    Signed-off-by: Takeshi Yamamuro <[email protected]>
---
 .../apache/spark/sql/catalyst/parser/SqlBase.g4    |  84 ---
 .../parser/TableIdentifierParserSuite.scala        | 650 ++++++++++++++++++++-
 2 files changed, 649 insertions(+), 85 deletions(-)

diff --git 
a/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 
b/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4
index c61cda8..d11c28c 100644
--- 
a/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4
+++ 
b/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4
@@ -736,7 +736,6 @@ qualifiedName
 
 identifier
     : strictIdentifier
-    | {ansi}? ansiReserved
     | {!ansi}? defaultReserved
     ;
 
@@ -761,89 +760,6 @@ number
     | MINUS? BIGDECIMAL_LITERAL       #bigDecimalLiteral
     ;
 
-// NOTE: You must follow a rule below when you add a new ANTLR token in this 
file:
-//  - All the ANTLR tokens = UNION(`ansiReserved`, `ansiNonReserved`) = 
UNION(`defaultReserved`, `nonReserved`)
-//
-// Let's say you add a new token `NEWTOKEN` and this is not reserved 
regardless of a `spark.sql.parser.ansi.enabled`
-// value. In this case, you must add a token `NEWTOKEN` in both 
`ansiNonReserved` and `nonReserved`.
-//
-// It is recommended to list them in alphabetical order.
-
-// The list of the reserved keywords when `spark.sql.parser.ansi.enabled` is 
true. Currently, we only reserve
-// the ANSI keywords that almost all the ANSI SQL standards (SQL-92, SQL-99, 
SQL-2003, SQL-2008, SQL-2011,
-// and SQL-2016) and PostgreSQL reserve.
-ansiReserved
-    : ALL
-    | AND
-    | ANTI
-    | ANY
-    | AS
-    | AUTHORIZATION
-    | BOTH
-    | CASE
-    | CAST
-    | CHECK
-    | COLLATE
-    | COLUMN
-    | CONSTRAINT
-    | CREATE
-    | CROSS
-    | CURRENT_DATE
-    | CURRENT_TIME
-    | CURRENT_TIMESTAMP
-    | CURRENT_USER
-    | DISTINCT
-    | ELSE
-    | END
-    | EXCEPT
-    | FALSE
-    | FETCH
-    | FOR
-    | FOREIGN
-    | FROM
-    | FULL
-    | GRANT
-    | GROUP
-    | HAVING
-    | IN
-    | INNER
-    | INTERSECT
-    | INTO
-    | IS
-    | JOIN
-    | LEADING
-    | LEFT
-    | NATURAL
-    | NOT
-    | NULL
-    | ON
-    | ONLY
-    | OR
-    | ORDER
-    | OUTER
-    | OVERLAPS
-    | PRIMARY
-    | REFERENCES
-    | RIGHT
-    | SELECT
-    | SEMI
-    | SESSION_USER
-    | SETMINUS
-    | SOME
-    | TABLE
-    | THEN
-    | TO
-    | TRAILING
-    | UNION
-    | UNIQUE
-    | USER
-    | USING
-    | WHEN
-    | WHERE
-    | WITH
-    ;
-
-
 // The list of the non-reserved keywords when `spark.sql.parser.ansi.enabled` 
is true.
 ansiNonReserved
     : ADD
diff --git 
a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/TableIdentifierParserSuite.scala
 
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/TableIdentifierParserSuite.scala
index 3d41c27..2725deb 100644
--- 
a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/TableIdentifierParserSuite.scala
+++ 
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/TableIdentifierParserSuite.scala
@@ -18,8 +18,10 @@ package org.apache.spark.sql.catalyst.parser
 
 import org.apache.spark.SparkFunSuite
 import org.apache.spark.sql.catalyst.TableIdentifier
+import org.apache.spark.sql.catalyst.plans.SQLHelper
+import org.apache.spark.sql.internal.SQLConf
 
-class TableIdentifierParserSuite extends SparkFunSuite {
+class TableIdentifierParserSuite extends SparkFunSuite with SQLHelper {
   import CatalystSqlParser._
 
   // Add "$elem$", "$value$" & "$key$"
@@ -281,6 +283,635 @@ class TableIdentifierParserSuite extends SparkFunSuite {
     "where",
     "with")
 
+  // All the keywords in `docs/sql-reserved-and-non-reserved-key-words.md` are 
listed below:
+  val allCandidateKeywords = Set(
+    "abs",
+    "absolute",
+    "acos",
+    "action",
+    "add",
+    "after",
+    "all",
+    "allocate",
+    "alter",
+    "analyze",
+    "and",
+    "anti",
+    "any",
+    "archive",
+    "are",
+    "array",
+    "array_agg",
+    "array_max_cardinality",
+    "as",
+    "asc",
+    "asensitive",
+    "asin",
+    "assertion",
+    "asymmetric",
+    "at",
+    "atan",
+    "atomic",
+    "authorization",
+    "avg",
+    "before",
+    "begin",
+    "begin_frame",
+    "begin_partition",
+    "between",
+    "bigint",
+    "binary",
+    "bit",
+    "bit_length",
+    "blob",
+    "boolean",
+    "both",
+    "breadth",
+    "bucket",
+    "buckets",
+    "by",
+    "cache",
+    "call",
+    "called",
+    "cardinality",
+    "cascade",
+    "cascaded",
+    "case",
+    "cast",
+    "catalog",
+    "ceil",
+    "ceiling",
+    "change",
+    "char",
+    "char_length",
+    "character",
+    "character_length",
+    "check",
+    "classifier",
+    "clear",
+    "clob",
+    "close",
+    "cluster",
+    "clustered",
+    "coalesce",
+    "codegen",
+    "collate",
+    "collation",
+    "collect",
+    "collection",
+    "column",
+    "columns",
+    "comment",
+    "commit",
+    "compact",
+    "compactions",
+    "compute",
+    "concatenate",
+    "condition",
+    "connect",
+    "connection",
+    "constraint",
+    "constraints",
+    "constructor",
+    "contains",
+    "continue",
+    "convert",
+    "copy",
+    "corr",
+    "corresponding",
+    "cos",
+    "cosh",
+    "cost",
+    "count",
+    "covar_pop",
+    "covar_samp",
+    "create",
+    "cross",
+    "cube",
+    "cume_dist",
+    "current",
+    "current_catalog",
+    "current_date",
+    "current_default_transform_group",
+    "current_path",
+    "current_role",
+    "current_row",
+    "current_schema",
+    "current_time",
+    "current_timestamp",
+    "current_transform_group_for_type",
+    "current_user",
+    "cursor",
+    "cycle",
+    "data",
+    "database",
+    "databases",
+    "date",
+    "day",
+    "dbproperties",
+    "deallocate",
+    "dec",
+    "decfloat",
+    "decimal",
+    "declare",
+    "default",
+    "deferrable",
+    "deferred",
+    "define",
+    "defined",
+    "delete",
+    "delimited",
+    "dense_rank",
+    "depth",
+    "deref",
+    "desc",
+    "describe",
+    "descriptor",
+    "deterministic",
+    "dfs",
+    "diagnostics",
+    "directories",
+    "directory",
+    "disconnect",
+    "distinct",
+    "distribute",
+    "div",
+    "do",
+    "domain",
+    "double",
+    "drop",
+    "dynamic",
+    "each",
+    "element",
+    "else",
+    "elseif",
+    "empty",
+    "end",
+    "end_frame",
+    "end_partition",
+    "equals",
+    "escape",
+    "escaped",
+    "every",
+    "except",
+    "exception",
+    "exchange",
+    "exec",
+    "execute",
+    "exists",
+    "exit",
+    "exp",
+    "explain",
+    "export",
+    "extended",
+    "external",
+    "extract",
+    "false",
+    "fetch",
+    "fields",
+    "fileformat",
+    "filter",
+    "first",
+    "first_value",
+    "float",
+    "following",
+    "for",
+    "foreign",
+    "format",
+    "formatted",
+    "found",
+    "frame_row",
+    "free",
+    "from",
+    "full",
+    "function",
+    "functions",
+    "fusion",
+    "general",
+    "get",
+    "global",
+    "go",
+    "goto",
+    "grant",
+    "group",
+    "grouping",
+    "groups",
+    "handler",
+    "having",
+    "hold",
+    "hour",
+    "identity",
+    "if",
+    "ignore",
+    "immediate",
+    "import",
+    "in",
+    "index",
+    "indexes",
+    "indicator",
+    "initial",
+    "initially",
+    "inner",
+    "inout",
+    "inpath",
+    "input",
+    "inputformat",
+    "insensitive",
+    "insert",
+    "int",
+    "integer",
+    "intersect",
+    "intersection",
+    "interval",
+    "into",
+    "is",
+    "isolation",
+    "items",
+    "iterate",
+    "join",
+    "json_array",
+    "json_arrayagg",
+    "json_exists",
+    "json_object",
+    "json_objectagg",
+    "json_query",
+    "json_table",
+    "json_table_primitive",
+    "json_value",
+    "key",
+    "keys",
+    "lag",
+    "language",
+    "large",
+    "last",
+    "last_value",
+    "lateral",
+    "lazy",
+    "lead",
+    "leading",
+    "leave",
+    "left",
+    "level",
+    "like",
+    "like_regex",
+    "limit",
+    "lines",
+    "list",
+    "listagg",
+    "ln",
+    "load",
+    "local",
+    "localtime",
+    "localtimestamp",
+    "location",
+    "locator",
+    "lock",
+    "locks",
+    "log",
+    "log10",
+    "logical",
+    "loop",
+    "lower",
+    "macro",
+    "map",
+    "match",
+    "match_number",
+    "match_recognize",
+    "matches",
+    "max",
+    "member",
+    "merge",
+    "method",
+    "min",
+    "minus",
+    "minute",
+    "mod",
+    "modifies",
+    "module",
+    "month",
+    "msck",
+    "multiset",
+    "names",
+    "national",
+    "natural",
+    "nchar",
+    "nclob",
+    "new",
+    "next",
+    "no",
+    "none",
+    "normalize",
+    "not",
+    "nth_value",
+    "ntile",
+    "null",
+    "nullif",
+    "nulls",
+    "numeric",
+    "object",
+    "occurrences_regex",
+    "octet_length",
+    "of",
+    "offset",
+    "old",
+    "omit",
+    "on",
+    "one",
+    "only",
+    "open",
+    "option",
+    "options",
+    "or",
+    "order",
+    "ordinality",
+    "out",
+    "outer",
+    "output",
+    "outputformat",
+    "over",
+    "overlaps",
+    "overlay",
+    "overwrite",
+    "pad",
+    "parameter",
+    "partial",
+    "partition",
+    "partitioned",
+    "partitions",
+    "path",
+    "pattern",
+    "per",
+    "percent",
+    "percent_rank",
+    "percentile_cont",
+    "percentile_disc",
+    "percentlit",
+    "period",
+    "pivot",
+    "portion",
+    "power",
+    "precedes",
+    "preceding",
+    "precision",
+    "prepare",
+    "preserve",
+    "primary",
+    "principals",
+    "prior",
+    "privileges",
+    "procedure",
+    "ptf",
+    "public",
+    "purge",
+    "range",
+    "rank",
+    "read",
+    "reads",
+    "real",
+    "recordreader",
+    "recordwriter",
+    "recover",
+    "recursive",
+    "reduce",
+    "ref",
+    "references",
+    "referencing",
+    "refresh",
+    "regr_avgx",
+    "regr_avgy",
+    "regr_count",
+    "regr_intercept",
+    "regr_r2",
+    "regr_slope",
+    "regr_sxx",
+    "regr_sxy",
+    "regr_syy",
+    "relative",
+    "release",
+    "rename",
+    "repair",
+    "repeat",
+    "replace",
+    "reset",
+    "resignal",
+    "restrict",
+    "result",
+    "return",
+    "returns",
+    "revoke",
+    "right",
+    "rlike",
+    "role",
+    "roles",
+    "rollback",
+    "rollup",
+    "routine",
+    "row",
+    "row_number",
+    "rows",
+    "running",
+    "savepoint",
+    "schema",
+    "scope",
+    "scroll",
+    "search",
+    "second",
+    "section",
+    "seek",
+    "select",
+    "semi",
+    "sensitive",
+    "separated",
+    "serde",
+    "serdeproperties",
+    "session",
+    "session_user",
+    "set",
+    "sets",
+    "show",
+    "signal",
+    "similar",
+    "sin",
+    "sinh",
+    "size",
+    "skewed",
+    "skip",
+    "smallint",
+    "some",
+    "sort",
+    "sorted",
+    "space",
+    "specific",
+    "specifictype",
+    "sql",
+    "sqlcode",
+    "sqlerror",
+    "sqlexception",
+    "sqlstate",
+    "sqlwarning",
+    "sqrt",
+    "start",
+    "state",
+    "static",
+    "statistics",
+    "stddev_pop",
+    "stddev_samp",
+    "stored",
+    "stratify",
+    "struct",
+    "submultiset",
+    "subset",
+    "substring",
+    "substring_regex",
+    "succeeds",
+    "sum",
+    "symmetric",
+    "system",
+    "system_time",
+    "system_user",
+    "table",
+    "tables",
+    "tablesample",
+    "tan",
+    "tanh",
+    "tblproperties",
+    "temporary",
+    "terminated",
+    "then",
+    "time",
+    "timestamp",
+    "timezone_hour",
+    "timezone_minute",
+    "to",
+    "touch",
+    "trailing",
+    "transaction",
+    "transactions",
+    "transform",
+    "translate",
+    "translate_regex",
+    "translation",
+    "treat",
+    "trigger",
+    "trim",
+    "trim_array",
+    "true",
+    "truncate",
+    "uescape",
+    "unarchive",
+    "unbounded",
+    "uncache",
+    "under",
+    "undo",
+    "union",
+    "unique",
+    "unknown",
+    "unlock",
+    "unnest",
+    "unset",
+    "until",
+    "update",
+    "upper",
+    "usage",
+    "use",
+    "user",
+    "using",
+    "value",
+    "value_of",
+    "values",
+    "var_pop",
+    "var_samp",
+    "varbinary",
+    "varchar",
+    "varying",
+    "versioning",
+    "view",
+    "when",
+    "whenever",
+    "where",
+    "while",
+    "width_bucket",
+    "window",
+    "with",
+    "within",
+    "without",
+    "work",
+    "write",
+    "year",
+    "zone")
+
+  val reservedKeywordsInAnsiMode = Set(
+    "all",
+    "and",
+    "anti",
+    "any",
+    "as",
+    "authorization",
+    "both",
+    "case",
+    "cast",
+    "check",
+    "collate",
+    "column",
+    "constraint",
+    "create",
+    "cross",
+    "current_date",
+    "current_time",
+    "current_timestamp",
+    "current_user",
+    "distinct",
+    "else",
+    "end",
+    "except",
+    "false",
+    "fetch",
+    "for",
+    "foreign",
+    "from",
+    "full",
+    "grant",
+    "group",
+    "having",
+    "in",
+    "inner",
+    "intersect",
+    "into",
+    "join",
+    "is",
+    "leading",
+    "left",
+    "natural",
+    "not",
+    "null",
+    "on",
+    "only",
+    "or",
+    "order",
+    "outer",
+    "overlaps",
+    "primary",
+    "references",
+    "right",
+    "select",
+    "semi",
+    "session_user",
+    "minus",
+    "some",
+    "table",
+    "then",
+    "to",
+    "trailing",
+    "union",
+    "unique",
+    "user",
+    "using",
+    "when",
+    "where",
+    "with")
+
+  val nonReservedKeywordsInAnsiMode = allCandidateKeywords -- 
reservedKeywordsInAnsiMode
+
   test("table identifier") {
     // Regular names.
     assert(TableIdentifier("q") === parseTableIdentifier("q"))
@@ -300,6 +931,23 @@ class TableIdentifierParserSuite extends SparkFunSuite {
     assert(TableIdentifier("x.y.z", None) === parseTableIdentifier("`x.y.z`"))
   }
 
+  test("table identifier - reserved/non-reserved keywords if ANSI mode 
enabled") {
+    withSQLConf(SQLConf.ANSI_SQL_PARSER.key -> "true") {
+      reservedKeywordsInAnsiMode.foreach { keyword =>
+        val errMsg = intercept[ParseException] {
+          parseTableIdentifier(keyword)
+        }.getMessage
+        assert(errMsg.contains("no viable alternative at input"))
+        assert(TableIdentifier(keyword) === 
parseTableIdentifier(s"`$keyword`"))
+        assert(TableIdentifier(keyword, Option("db")) === 
parseTableIdentifier(s"db.`$keyword`"))
+      }
+      nonReservedKeywordsInAnsiMode.foreach { keyword =>
+        assert(TableIdentifier(keyword) === parseTableIdentifier(s"$keyword"))
+        assert(TableIdentifier(keyword, Option("db")) === 
parseTableIdentifier(s"db.$keyword"))
+      }
+    }
+  }
+
   test("table identifier - strict keywords") {
     // SQL Keywords.
     hiveStrictNonReservedKeyword.foreach { keyword =>


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

[spark] branch master updated: [SPARK-26976][SQL] Forbid reserved keywords as identifiers when ANSI mode is on

Reply via email to