I propose the following patch (which goes on top of the current
ordinality one) to implement the suggested grammar changes.
I think this is the cleanest way, and I've tested that it both
passes regression and allows constructs like WITH time AS (...)
to work.
--
Andrew (irc:RhodiumToad)
*** a/src/backend/parser/gram.y
--- b/src/backend/parser/gram.y
***************
*** 608,615 **** static Node *makeRecursiveViewSelect(char *relname, List
*aliases, Node *query);
* The grammar thinks these are keywords, but they are not in the kwlist.h
* list and so can never be entered directly. The filter in parser.c
* creates these tokens when required.
*/
! %token NULLS_FIRST NULLS_LAST WITH_ORDINALITY WITH_TIME
/* Precedence: lowest to highest */
%nonassoc SET /* see relation_expr_opt_alias
*/
--- 608,645 ----
* The grammar thinks these are keywords, but they are not in the kwlist.h
* list and so can never be entered directly. The filter in parser.c
* creates these tokens when required.
+ *
+ * The rules for referencing WITH and these special lookahead keywords are
+ * as follows:
+ *
+ * If WITH is followed by a fixed token, such as WITH OIDS, or a non-keyword
+ * token such as '(', then use WITH directly, except as indicated below.
+ *
+ * If WITH could be followed by an object name, then use the with_keyword
+ * production instead. Also, if there are alternative branches in which some
+ * have a fixed keyword following WITH and some have an object name, then
+ * use with_keyword for all of them, overriding the above rule.
+ *
+ * (Similar rules would apply for NULLS_P, but currently there are no
+ * instances in the grammar where this is used other than as a special
+ * case or as an identifier.)
+ *
+ * The productions associated with these special cases are listed under
+ * "Special-case keyword sequences" near the end of the grammar. It is
+ * intended that these be the ONLY places that the special lookahead
+ * keywords appear, in order to avoid complicating the main body of the
+ * grammar.
+ *
+ * To add a new special case:
+ * - add the special token names here in a %token decl
+ * - add or extend the productions under "Special-case keyword sequences"
+ * - add appropriate comparisons in:
+ * base_yylex in src/backend/parser/parser.c
+ * filtered_base_yylex in src/interfaces/ecpg/preproc/parser.c
*/
!
! %token NULLS_BEFORE_FIRST NULLS_BEFORE_LAST
! %token WITH_BEFORE_ORDINALITY WITH_BEFORE_TIME
/* Precedence: lowest to highest */
%nonassoc SET /* see relation_expr_opt_alias
*/
***************
*** 838,848 **** CreateRoleStmt:
}
;
-
- opt_with: WITH
{}
- | /*EMPTY*/
{}
- ;
-
/*
* Options for CREATE ROLE and ALTER ROLE (also used by CREATE/ALTER USER
* for backwards compatibility). Note: the only option required by SQL99
--- 868,873 ----
***************
*** 3127,3138 **** ExclusionConstraintList:
{ $$ = lappend($1, $3); }
;
! ExclusionConstraintElem: index_elem WITH any_operator
{
$$ = list_make2($1, $3);
}
/* allow OPERATOR() decoration for the benefit of
ruleutils.c */
! | index_elem WITH OPERATOR '(' any_operator ')'
{
$$ = list_make2($1, $5);
}
--- 3152,3163 ----
{ $$ = lappend($1, $3); }
;
! ExclusionConstraintElem: index_elem with_keyword any_operator
{
$$ = list_make2($1, $3);
}
/* allow OPERATOR() decoration for the benefit of
ruleutils.c */
! | index_elem with_keyword OPERATOR '(' any_operator ')'
{
$$ = list_make2($1, $5);
}
***************
*** 6188,6195 **** opt_asc_desc: ASC
{ $$ = SORTBY_ASC; }
| /*EMPTY*/
{ $$ = SORTBY_DEFAULT; }
;
! opt_nulls_order: NULLS_FIRST { $$ =
SORTBY_NULLS_FIRST; }
! | NULLS_LAST { $$ =
SORTBY_NULLS_LAST; }
| /*EMPTY*/
{ $$ = SORTBY_NULLS_DEFAULT; }
;
--- 6213,6220 ----
| /*EMPTY*/
{ $$ = SORTBY_DEFAULT; }
;
! opt_nulls_order: nulls_first { $$ =
SORTBY_NULLS_FIRST; }
! | nulls_last { $$ =
SORTBY_NULLS_LAST; }
| /*EMPTY*/
{ $$ = SORTBY_NULLS_DEFAULT; }
;
***************
*** 8348,8354 **** AlterTSDictionaryStmt:
;
AlterTSConfigurationStmt:
! ALTER TEXT_P SEARCH CONFIGURATION any_name ADD_P
MAPPING FOR name_list WITH any_name_list
{
AlterTSConfigurationStmt *n =
makeNode(AlterTSConfigurationStmt);
n->cfgname = $5;
--- 8373,8379 ----
;
AlterTSConfigurationStmt:
! ALTER TEXT_P SEARCH CONFIGURATION any_name ADD_P
MAPPING FOR name_list with_keyword any_name_list
{
AlterTSConfigurationStmt *n =
makeNode(AlterTSConfigurationStmt);
n->cfgname = $5;
***************
*** 8358,8364 **** AlterTSConfigurationStmt:
n->replace = false;
$$ = (Node*)n;
}
! | ALTER TEXT_P SEARCH CONFIGURATION any_name ALTER
MAPPING FOR name_list WITH any_name_list
{
AlterTSConfigurationStmt *n =
makeNode(AlterTSConfigurationStmt);
n->cfgname = $5;
--- 8383,8389 ----
n->replace = false;
$$ = (Node*)n;
}
! | ALTER TEXT_P SEARCH CONFIGURATION any_name ALTER
MAPPING FOR name_list with_keyword any_name_list
{
AlterTSConfigurationStmt *n =
makeNode(AlterTSConfigurationStmt);
n->cfgname = $5;
***************
*** 8368,8374 **** AlterTSConfigurationStmt:
n->replace = false;
$$ = (Node*)n;
}
! | ALTER TEXT_P SEARCH CONFIGURATION any_name ALTER
MAPPING REPLACE any_name WITH any_name
{
AlterTSConfigurationStmt *n =
makeNode(AlterTSConfigurationStmt);
n->cfgname = $5;
--- 8393,8399 ----
n->replace = false;
$$ = (Node*)n;
}
! | ALTER TEXT_P SEARCH CONFIGURATION any_name ALTER
MAPPING REPLACE any_name with_keyword any_name
{
AlterTSConfigurationStmt *n =
makeNode(AlterTSConfigurationStmt);
n->cfgname = $5;
***************
*** 8378,8384 **** AlterTSConfigurationStmt:
n->replace = true;
$$ = (Node*)n;
}
! | ALTER TEXT_P SEARCH CONFIGURATION any_name ALTER
MAPPING FOR name_list REPLACE any_name WITH any_name
{
AlterTSConfigurationStmt *n =
makeNode(AlterTSConfigurationStmt);
n->cfgname = $5;
--- 8403,8409 ----
n->replace = true;
$$ = (Node*)n;
}
! | ALTER TEXT_P SEARCH CONFIGURATION any_name ALTER
MAPPING FOR name_list REPLACE any_name with_keyword any_name
{
AlterTSConfigurationStmt *n =
makeNode(AlterTSConfigurationStmt);
n->cfgname = $5;
***************
*** 9247,9260 **** simple_select:
* We don't currently support the SEARCH or CYCLE clause.
*/
with_clause:
! WITH cte_list
{
$$ = makeNode(WithClause);
$$->ctes = $2;
$$->recursive = false;
$$->location = @1;
}
! | WITH RECURSIVE cte_list
{
$$ = makeNode(WithClause);
$$->ctes = $3;
--- 9272,9285 ----
* We don't currently support the SEARCH or CYCLE clause.
*/
with_clause:
! with_keyword cte_list
{
$$ = makeNode(WithClause);
$$->ctes = $2;
$$->recursive = false;
$$->location = @1;
}
! | with_keyword RECURSIVE cte_list
{
$$ = makeNode(WithClause);
$$->ctes = $3;
***************
*** 9593,9599 **** table_ref: relation_expr opt_alias_clause
n->coldeflist = lsecond($2);
$$ = (Node *) n;
}
! | func_table WITH_ORDINALITY func_alias_clause
{
RangeFunction *n =
makeNode(RangeFunction);
n->lateral = false;
--- 9618,9624 ----
n->coldeflist = lsecond($2);
$$ = (Node *) n;
}
! | func_table with_ordinality func_alias_clause
{
RangeFunction *n =
makeNode(RangeFunction);
n->lateral = false;
***************
*** 9613,9619 **** table_ref: relation_expr opt_alias_clause
n->coldeflist = lsecond($3);
$$ = (Node *) n;
}
! | LATERAL_P func_table WITH_ORDINALITY func_alias_clause
{
RangeFunction *n =
makeNode(RangeFunction);
n->lateral = true;
--- 9638,9644 ----
n->coldeflist = lsecond($3);
$$ = (Node *) n;
}
! | LATERAL_P func_table with_ordinality func_alias_clause
{
RangeFunction *n =
makeNode(RangeFunction);
n->lateral = true;
***************
*** 10413,10419 **** ConstInterval:
;
opt_timezone:
! WITH_TIME ZONE
{ $$ = TRUE; }
| WITHOUT TIME ZONE
{ $$ = FALSE; }
| /*EMPTY*/
{ $$ = FALSE; }
;
--- 10438,10444 ----
;
opt_timezone:
! with_time ZONE
{ $$ = TRUE; }
| WITHOUT TIME ZONE
{ $$ = FALSE; }
| /*EMPTY*/
{ $$ = FALSE; }
;
***************
*** 12431,12436 **** ColLabel: IDENT
{ $$ = $1; }
--- 12456,12499 ----
| reserved_keyword
{ $$ = pstrdup($1); }
;
+ /*
+ * Special-case keyword sequences.
+ *
+ * To disambiguate WITH TIME, WITH ORDINALITY, NULLS FIRST, NULLS
+ * LAST, which otherwise cause conflicts, the lexer looks ahead one
+ * extra token and may replace the WITH or NULLS keyword by
+ * WITH_BEFORE_* or NULLS_BEFORE_* (the following keyword is not
+ * touched).
+ *
+ * These productions collect the special cases in one place; see the
+ * token declarations at the top of the file for the rules used.
+ */
+
+ opt_with: with_keyword
+ | /*EMPTY*/
+ ;
+
+ with_keyword: WITH
+ | WITH_BEFORE_TIME
+ | WITH_BEFORE_ORDINALITY
+ ;
+
+ with_ordinality: WITH_BEFORE_ORDINALITY ORDINALITY ;
+
+ with_time: WITH_BEFORE_TIME TIME ;
+
+ /*
+ * not needed since NULLS never occurs alone:
+ *
+ * nulls_keyword: NULLS_P
+ * | NULLS_BEFORE_FIRST
+ * | NULLS_BEFORE_LAST
+ * ;
+ */
+
+ nulls_first: NULLS_BEFORE_FIRST FIRST_P ;
+
+ nulls_last: NULLS_BEFORE_LAST LAST_P ;
/*
* Keyword category lists. Generally, every keyword present in
*** a/src/backend/parser/parser.c
--- b/src/backend/parser/parser.c
***************
*** 65,72 **** raw_parser(const char *str)
* Intermediate filter between parser and core lexer (core_yylex in scan.l).
*
* The filter is needed because in some cases the standard SQL grammar
! * requires more than one token lookahead. We reduce these cases to
one-token
! * lookahead by combining tokens here, in order to keep the grammar LALR(1).
*
* Using a filter is simpler than trying to recognize multiword tokens
* directly in scan.l, because we'd have to allow for comments between the
--- 65,80 ----
* Intermediate filter between parser and core lexer (core_yylex in scan.l).
*
* The filter is needed because in some cases the standard SQL grammar
! * requires more than one token lookahead. We reduce these cases to
! * one-token lookahead by adding one extra token of lookahead here, and
! * altering the keyword returned based on what follows, in order to keep
! * the grammar LALR(1).
! *
! * We used to combine keywords, but no longer do. Now, keywords that would
! * cause conflicts have special <keyword>_BEFORE_<lookahead> forms, which
! * flag up in advance that the next token is <lookahead>. This is more
! * flexible when handling some of the edge cases such as
! * WITH ordinality AS (...)
*
* Using a filter is simpler than trying to recognize multiword tokens
* directly in scan.l, because we'd have to allow for comments between the
***************
*** 98,167 **** base_yylex(YYSTYPE *lvalp, YYLTYPE *llocp, core_yyscan_t
yyscanner)
else
cur_token = core_yylex(&(lvalp->core_yystype), llocp,
yyscanner);
! /* Do we need to look ahead for a possible multiword token? */
! switch (cur_token)
! {
! case NULLS_P:
!
! /*
! * NULLS FIRST and NULLS LAST must be reduced to one
token
! */
! cur_yylval = lvalp->core_yystype;
! cur_yylloc = *llocp;
! next_token = core_yylex(&(lvalp->core_yystype), llocp,
yyscanner);
! switch (next_token)
! {
! case FIRST_P:
! cur_token = NULLS_FIRST;
! break;
! case LAST_P:
! cur_token = NULLS_LAST;
! break;
! default:
! /* save the lookahead token for next
time */
! yyextra->lookahead_token = next_token;
! yyextra->lookahead_yylval =
lvalp->core_yystype;
! yyextra->lookahead_yylloc = *llocp;
! yyextra->have_lookahead = true;
! /* and back up the output info to
cur_token */
! lvalp->core_yystype = cur_yylval;
! *llocp = cur_yylloc;
! break;
! }
! break;
!
! case WITH:
!
! /*
! * WITH TIME and WITH ORDINALITY must each be reduced
to one token
! */
! cur_yylval = lvalp->core_yystype;
! cur_yylloc = *llocp;
! next_token = core_yylex(&(lvalp->core_yystype), llocp,
yyscanner);
! switch (next_token)
! {
! case TIME:
! cur_token = WITH_TIME;
! break;
! case ORDINALITY:
! cur_token = WITH_ORDINALITY;
! break;
! default:
! /* save the lookahead token for next
time */
! yyextra->lookahead_token = next_token;
! yyextra->lookahead_yylval =
lvalp->core_yystype;
! yyextra->lookahead_yylloc = *llocp;
! yyextra->have_lookahead = true;
! /* and back up the output info to
cur_token */
! lvalp->core_yystype = cur_yylval;
! *llocp = cur_yylloc;
! break;
! }
! break;
!
! default:
! break;
! }
return cur_token;
}
--- 106,151 ----
else
cur_token = core_yylex(&(lvalp->core_yystype), llocp,
yyscanner);
! /*
! * Do we need to look ahead for a possible multiword token?
! * If not, we're done here.
! */
!
! if (cur_token != NULLS_P && cur_token != WITH)
! return cur_token;
!
! /* Fetch the lookahead token */
!
! cur_yylval = lvalp->core_yystype;
! cur_yylloc = *llocp;
! next_token = core_yylex(&(lvalp->core_yystype), llocp, yyscanner);
!
! /* save the lookahead token for next time */
!
! yyextra->lookahead_token = next_token;
! yyextra->lookahead_yylval = lvalp->core_yystype;
! yyextra->lookahead_yylloc = *llocp;
! yyextra->have_lookahead = true;
!
! /* and back up the output info to cur_token */
!
! lvalp->core_yystype = cur_yylval;
! *llocp = cur_yylloc;
!
! /*
! * We don't merge the two tokens into one, but just modify the value of
! * the leading token to reflect what follows.
! */
!
! if (cur_token == NULLS_P && next_token == FIRST_P)
! return NULLS_BEFORE_FIRST;
! else if (cur_token == NULLS_P && next_token == LAST_P)
! return NULLS_BEFORE_LAST;
!
! if (cur_token == WITH && next_token == TIME)
! return WITH_BEFORE_TIME;
! else if (cur_token == WITH && next_token == ORDINALITY)
! return WITH_BEFORE_ORDINALITY;
return cur_token;
}
*** a/src/interfaces/ecpg/preproc/parser.c
--- b/src/interfaces/ecpg/preproc/parser.c
***************
*** 35,42 **** static YYLTYPE lookahead_yylloc; /* yylloc for lookahead token */
* Intermediate filter between parser and base lexer (base_yylex in scan.l).
*
* The filter is needed because in some cases the standard SQL grammar
! * requires more than one token lookahead. We reduce these cases to
one-token
! * lookahead by combining tokens here, in order to keep the grammar LALR(1).
*
* Using a filter is simpler than trying to recognize multiword tokens
* directly in scan.l, because we'd have to allow for comments between the
--- 35,50 ----
* Intermediate filter between parser and base lexer (base_yylex in scan.l).
*
* The filter is needed because in some cases the standard SQL grammar
! * requires more than one token lookahead. We reduce these cases to
! * one-token lookahead by adding one extra token of lookahead here, and
! * altering the keyword returned based on what follows, in order to keep
! * the grammar LALR(1).
! *
! * We used to combine keywords, but no longer do. Now, keywords that would
! * cause conflicts have special <keyword>_BEFORE_<lookahead> forms, which
! * flag up in advance that the next token is <lookahead>. This is more
! * flexible when handling some of the edge cases such as
! * WITH ordinality AS (...)
*
* Using a filter is simpler than trying to recognize multiword tokens
* directly in scan.l, because we'd have to allow for comments between the
***************
*** 63,129 **** filtered_base_yylex(void)
else
cur_token = base_yylex();
! /* Do we need to look ahead for a possible multiword token? */
! switch (cur_token)
! {
! case NULLS_P:
!
! /*
! * NULLS FIRST and NULLS LAST must be reduced to one
token
! */
! cur_yylval = base_yylval;
! cur_yylloc = base_yylloc;
! next_token = base_yylex();
! switch (next_token)
! {
! case FIRST_P:
! cur_token = NULLS_FIRST;
! break;
! case LAST_P:
! cur_token = NULLS_LAST;
! break;
! default:
! /* save the lookahead token for next
time */
! lookahead_token = next_token;
! lookahead_yylval = base_yylval;
! lookahead_yylloc = base_yylloc;
! have_lookahead = true;
! /* and back up the output info to
cur_token */
! base_yylval = cur_yylval;
! base_yylloc = cur_yylloc;
! break;
! }
! break;
!
! case WITH:
!
! /*
! * WITH TIME must be reduced to one token
! */
! cur_yylval = base_yylval;
! cur_yylloc = base_yylloc;
! next_token = base_yylex();
! switch (next_token)
! {
! case TIME:
! cur_token = WITH_TIME;
! break;
! default:
! /* save the lookahead token for next
time */
! lookahead_token = next_token;
! lookahead_yylval = base_yylval;
! lookahead_yylloc = base_yylloc;
! have_lookahead = true;
! /* and back up the output info to
cur_token */
! base_yylval = cur_yylval;
! base_yylloc = cur_yylloc;
! break;
! }
! break;
!
! default:
! break;
! }
return cur_token;
}
--- 71,116 ----
else
cur_token = base_yylex();
! /*
! * Do we need to look ahead for a possible multiword token?
! * If not, we're done here.
! */
!
! if (cur_token != NULLS_P && cur_token != WITH)
! return cur_token;
!
! /* fetch the lookahead token */
!
! cur_yylval = base_yylval;
! cur_yylloc = base_yylloc;
! next_token = base_yylex();
!
! /* save the lookahead token for next time */
!
! lookahead_token = next_token;
! lookahead_yylval = base_yylval;
! lookahead_yylloc = base_yylloc;
! have_lookahead = true;
!
! /* and back up the output info to cur_token */
!
! base_yylval = cur_yylval;
! base_yylloc = cur_yylloc;
!
! /*
! * We don't merge the two tokens into one, but just modify the value of
! * the leading token to reflect what follows.
! */
!
! if (cur_token == NULLS_P && next_token == FIRST_P)
! return NULLS_BEFORE_FIRST;
! else if (cur_token == NULLS_P && next_token == LAST_P)
! return NULLS_BEFORE_LAST;
!
! if (cur_token == WITH && next_token == TIME)
! return WITH_BEFORE_TIME;
! else if (cur_token == WITH && next_token == ORDINALITY)
! return WITH_BEFORE_ORDINALITY;
return cur_token;
}
--
Sent via pgsql-hackers mailing list ([email protected])
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers