This is an automated email from the ASF dual-hosted git repository. yamamuro pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push: new 1e9469b [SPARK-26976][SQL] Forbid reserved keywords as identifiers when ANSI mode is on 1e9469b is described below commit 1e9469bb7a71b06d610edaaebca933f4219a6eb3 Author: Takeshi Yamamuro <yamam...@apache.org> AuthorDate: Wed Mar 13 11:20:27 2019 +0900 [SPARK-26976][SQL] Forbid reserved keywords as identifiers when ANSI mode is on ## What changes were proposed in this pull request? This pr added code to forbid reserved keywords as identifiers when ANSI mode is on. This is a follow-up of SPARK-26215(#23259). ## How was this patch tested? Added tests in `TableIdentifierParserSuite`. Closes #23880 from maropu/SPARK-26976. Authored-by: Takeshi Yamamuro <yamam...@apache.org> Signed-off-by: Takeshi Yamamuro <yamam...@apache.org> --- .../apache/spark/sql/catalyst/parser/SqlBase.g4 | 84 --- .../parser/TableIdentifierParserSuite.scala | 650 ++++++++++++++++++++- 2 files changed, 649 insertions(+), 85 deletions(-) diff --git a/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 b/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 index c61cda8..d11c28c 100644 --- a/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 +++ b/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 @@ -736,7 +736,6 @@ qualifiedName identifier : strictIdentifier - | {ansi}? ansiReserved | {!ansi}? defaultReserved ; @@ -761,89 +760,6 @@ number | MINUS? BIGDECIMAL_LITERAL #bigDecimalLiteral ; -// NOTE: You must follow a rule below when you add a new ANTLR token in this file: -// - All the ANTLR tokens = UNION(`ansiReserved`, `ansiNonReserved`) = UNION(`defaultReserved`, `nonReserved`) -// -// Let's say you add a new token `NEWTOKEN` and this is not reserved regardless of a `spark.sql.parser.ansi.enabled` -// value. In this case, you must add a token `NEWTOKEN` in both `ansiNonReserved` and `nonReserved`. -// -// It is recommended to list them in alphabetical order. - -// The list of the reserved keywords when `spark.sql.parser.ansi.enabled` is true. Currently, we only reserve -// the ANSI keywords that almost all the ANSI SQL standards (SQL-92, SQL-99, SQL-2003, SQL-2008, SQL-2011, -// and SQL-2016) and PostgreSQL reserve. -ansiReserved - : ALL - | AND - | ANTI - | ANY - | AS - | AUTHORIZATION - | BOTH - | CASE - | CAST - | CHECK - | COLLATE - | COLUMN - | CONSTRAINT - | CREATE - | CROSS - | CURRENT_DATE - | CURRENT_TIME - | CURRENT_TIMESTAMP - | CURRENT_USER - | DISTINCT - | ELSE - | END - | EXCEPT - | FALSE - | FETCH - | FOR - | FOREIGN - | FROM - | FULL - | GRANT - | GROUP - | HAVING - | IN - | INNER - | INTERSECT - | INTO - | IS - | JOIN - | LEADING - | LEFT - | NATURAL - | NOT - | NULL - | ON - | ONLY - | OR - | ORDER - | OUTER - | OVERLAPS - | PRIMARY - | REFERENCES - | RIGHT - | SELECT - | SEMI - | SESSION_USER - | SETMINUS - | SOME - | TABLE - | THEN - | TO - | TRAILING - | UNION - | UNIQUE - | USER - | USING - | WHEN - | WHERE - | WITH - ; - - // The list of the non-reserved keywords when `spark.sql.parser.ansi.enabled` is true. ansiNonReserved : ADD diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/TableIdentifierParserSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/TableIdentifierParserSuite.scala index 3d41c27..2725deb 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/TableIdentifierParserSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/TableIdentifierParserSuite.scala @@ -18,8 +18,10 @@ package org.apache.spark.sql.catalyst.parser import org.apache.spark.SparkFunSuite import org.apache.spark.sql.catalyst.TableIdentifier +import org.apache.spark.sql.catalyst.plans.SQLHelper +import org.apache.spark.sql.internal.SQLConf -class TableIdentifierParserSuite extends SparkFunSuite { +class TableIdentifierParserSuite extends SparkFunSuite with SQLHelper { import CatalystSqlParser._ // Add "$elem$", "$value$" & "$key$" @@ -281,6 +283,635 @@ class TableIdentifierParserSuite extends SparkFunSuite { "where", "with") + // All the keywords in `docs/sql-reserved-and-non-reserved-key-words.md` are listed below: + val allCandidateKeywords = Set( + "abs", + "absolute", + "acos", + "action", + "add", + "after", + "all", + "allocate", + "alter", + "analyze", + "and", + "anti", + "any", + "archive", + "are", + "array", + "array_agg", + "array_max_cardinality", + "as", + "asc", + "asensitive", + "asin", + "assertion", + "asymmetric", + "at", + "atan", + "atomic", + "authorization", + "avg", + "before", + "begin", + "begin_frame", + "begin_partition", + "between", + "bigint", + "binary", + "bit", + "bit_length", + "blob", + "boolean", + "both", + "breadth", + "bucket", + "buckets", + "by", + "cache", + "call", + "called", + "cardinality", + "cascade", + "cascaded", + "case", + "cast", + "catalog", + "ceil", + "ceiling", + "change", + "char", + "char_length", + "character", + "character_length", + "check", + "classifier", + "clear", + "clob", + "close", + "cluster", + "clustered", + "coalesce", + "codegen", + "collate", + "collation", + "collect", + "collection", + "column", + "columns", + "comment", + "commit", + "compact", + "compactions", + "compute", + "concatenate", + "condition", + "connect", + "connection", + "constraint", + "constraints", + "constructor", + "contains", + "continue", + "convert", + "copy", + "corr", + "corresponding", + "cos", + "cosh", + "cost", + "count", + "covar_pop", + "covar_samp", + "create", + "cross", + "cube", + "cume_dist", + "current", + "current_catalog", + "current_date", + "current_default_transform_group", + "current_path", + "current_role", + "current_row", + "current_schema", + "current_time", + "current_timestamp", + "current_transform_group_for_type", + "current_user", + "cursor", + "cycle", + "data", + "database", + "databases", + "date", + "day", + "dbproperties", + "deallocate", + "dec", + "decfloat", + "decimal", + "declare", + "default", + "deferrable", + "deferred", + "define", + "defined", + "delete", + "delimited", + "dense_rank", + "depth", + "deref", + "desc", + "describe", + "descriptor", + "deterministic", + "dfs", + "diagnostics", + "directories", + "directory", + "disconnect", + "distinct", + "distribute", + "div", + "do", + "domain", + "double", + "drop", + "dynamic", + "each", + "element", + "else", + "elseif", + "empty", + "end", + "end_frame", + "end_partition", + "equals", + "escape", + "escaped", + "every", + "except", + "exception", + "exchange", + "exec", + "execute", + "exists", + "exit", + "exp", + "explain", + "export", + "extended", + "external", + "extract", + "false", + "fetch", + "fields", + "fileformat", + "filter", + "first", + "first_value", + "float", + "following", + "for", + "foreign", + "format", + "formatted", + "found", + "frame_row", + "free", + "from", + "full", + "function", + "functions", + "fusion", + "general", + "get", + "global", + "go", + "goto", + "grant", + "group", + "grouping", + "groups", + "handler", + "having", + "hold", + "hour", + "identity", + "if", + "ignore", + "immediate", + "import", + "in", + "index", + "indexes", + "indicator", + "initial", + "initially", + "inner", + "inout", + "inpath", + "input", + "inputformat", + "insensitive", + "insert", + "int", + "integer", + "intersect", + "intersection", + "interval", + "into", + "is", + "isolation", + "items", + "iterate", + "join", + "json_array", + "json_arrayagg", + "json_exists", + "json_object", + "json_objectagg", + "json_query", + "json_table", + "json_table_primitive", + "json_value", + "key", + "keys", + "lag", + "language", + "large", + "last", + "last_value", + "lateral", + "lazy", + "lead", + "leading", + "leave", + "left", + "level", + "like", + "like_regex", + "limit", + "lines", + "list", + "listagg", + "ln", + "load", + "local", + "localtime", + "localtimestamp", + "location", + "locator", + "lock", + "locks", + "log", + "log10", + "logical", + "loop", + "lower", + "macro", + "map", + "match", + "match_number", + "match_recognize", + "matches", + "max", + "member", + "merge", + "method", + "min", + "minus", + "minute", + "mod", + "modifies", + "module", + "month", + "msck", + "multiset", + "names", + "national", + "natural", + "nchar", + "nclob", + "new", + "next", + "no", + "none", + "normalize", + "not", + "nth_value", + "ntile", + "null", + "nullif", + "nulls", + "numeric", + "object", + "occurrences_regex", + "octet_length", + "of", + "offset", + "old", + "omit", + "on", + "one", + "only", + "open", + "option", + "options", + "or", + "order", + "ordinality", + "out", + "outer", + "output", + "outputformat", + "over", + "overlaps", + "overlay", + "overwrite", + "pad", + "parameter", + "partial", + "partition", + "partitioned", + "partitions", + "path", + "pattern", + "per", + "percent", + "percent_rank", + "percentile_cont", + "percentile_disc", + "percentlit", + "period", + "pivot", + "portion", + "power", + "precedes", + "preceding", + "precision", + "prepare", + "preserve", + "primary", + "principals", + "prior", + "privileges", + "procedure", + "ptf", + "public", + "purge", + "range", + "rank", + "read", + "reads", + "real", + "recordreader", + "recordwriter", + "recover", + "recursive", + "reduce", + "ref", + "references", + "referencing", + "refresh", + "regr_avgx", + "regr_avgy", + "regr_count", + "regr_intercept", + "regr_r2", + "regr_slope", + "regr_sxx", + "regr_sxy", + "regr_syy", + "relative", + "release", + "rename", + "repair", + "repeat", + "replace", + "reset", + "resignal", + "restrict", + "result", + "return", + "returns", + "revoke", + "right", + "rlike", + "role", + "roles", + "rollback", + "rollup", + "routine", + "row", + "row_number", + "rows", + "running", + "savepoint", + "schema", + "scope", + "scroll", + "search", + "second", + "section", + "seek", + "select", + "semi", + "sensitive", + "separated", + "serde", + "serdeproperties", + "session", + "session_user", + "set", + "sets", + "show", + "signal", + "similar", + "sin", + "sinh", + "size", + "skewed", + "skip", + "smallint", + "some", + "sort", + "sorted", + "space", + "specific", + "specifictype", + "sql", + "sqlcode", + "sqlerror", + "sqlexception", + "sqlstate", + "sqlwarning", + "sqrt", + "start", + "state", + "static", + "statistics", + "stddev_pop", + "stddev_samp", + "stored", + "stratify", + "struct", + "submultiset", + "subset", + "substring", + "substring_regex", + "succeeds", + "sum", + "symmetric", + "system", + "system_time", + "system_user", + "table", + "tables", + "tablesample", + "tan", + "tanh", + "tblproperties", + "temporary", + "terminated", + "then", + "time", + "timestamp", + "timezone_hour", + "timezone_minute", + "to", + "touch", + "trailing", + "transaction", + "transactions", + "transform", + "translate", + "translate_regex", + "translation", + "treat", + "trigger", + "trim", + "trim_array", + "true", + "truncate", + "uescape", + "unarchive", + "unbounded", + "uncache", + "under", + "undo", + "union", + "unique", + "unknown", + "unlock", + "unnest", + "unset", + "until", + "update", + "upper", + "usage", + "use", + "user", + "using", + "value", + "value_of", + "values", + "var_pop", + "var_samp", + "varbinary", + "varchar", + "varying", + "versioning", + "view", + "when", + "whenever", + "where", + "while", + "width_bucket", + "window", + "with", + "within", + "without", + "work", + "write", + "year", + "zone") + + val reservedKeywordsInAnsiMode = Set( + "all", + "and", + "anti", + "any", + "as", + "authorization", + "both", + "case", + "cast", + "check", + "collate", + "column", + "constraint", + "create", + "cross", + "current_date", + "current_time", + "current_timestamp", + "current_user", + "distinct", + "else", + "end", + "except", + "false", + "fetch", + "for", + "foreign", + "from", + "full", + "grant", + "group", + "having", + "in", + "inner", + "intersect", + "into", + "join", + "is", + "leading", + "left", + "natural", + "not", + "null", + "on", + "only", + "or", + "order", + "outer", + "overlaps", + "primary", + "references", + "right", + "select", + "semi", + "session_user", + "minus", + "some", + "table", + "then", + "to", + "trailing", + "union", + "unique", + "user", + "using", + "when", + "where", + "with") + + val nonReservedKeywordsInAnsiMode = allCandidateKeywords -- reservedKeywordsInAnsiMode + test("table identifier") { // Regular names. assert(TableIdentifier("q") === parseTableIdentifier("q")) @@ -300,6 +931,23 @@ class TableIdentifierParserSuite extends SparkFunSuite { assert(TableIdentifier("x.y.z", None) === parseTableIdentifier("`x.y.z`")) } + test("table identifier - reserved/non-reserved keywords if ANSI mode enabled") { + withSQLConf(SQLConf.ANSI_SQL_PARSER.key -> "true") { + reservedKeywordsInAnsiMode.foreach { keyword => + val errMsg = intercept[ParseException] { + parseTableIdentifier(keyword) + }.getMessage + assert(errMsg.contains("no viable alternative at input")) + assert(TableIdentifier(keyword) === parseTableIdentifier(s"`$keyword`")) + assert(TableIdentifier(keyword, Option("db")) === parseTableIdentifier(s"db.`$keyword`")) + } + nonReservedKeywordsInAnsiMode.foreach { keyword => + assert(TableIdentifier(keyword) === parseTableIdentifier(s"$keyword")) + assert(TableIdentifier(keyword, Option("db")) === parseTableIdentifier(s"db.$keyword")) + } + } + } + test("table identifier - strict keywords") { // SQL Keywords. hiveStrictNonReservedKeyword.foreach { keyword => --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org